implement device-side RNG #365

simeonschaub · 2025-09-06T17:33:53Z

Requires ~~JuliaGPU/GPUCompiler.jl#717~~ JuliaGPU/GPUCompiler.jl#718

codecov · 2025-09-07T14:15:36Z

Codecov Report

❌ Patch coverage is 93.87755% with 3 lines in your changes missing coverage. Please review.
✅ Project coverage is 79.86%. Comparing base (5d317c4) to head (449ca22).
⚠️ Report is 1 commits behind head on master.

Files with missing lines	Patch %	Lines
src/compiler/compilation.jl	93.33%	3 Missing ⚠️

Additional details and impacted files

@@            Coverage Diff             @@
##           master     #365      +/-   ##
==========================================
+ Coverage   79.01%   79.86%   +0.84%     
==========================================
  Files          12       12              
  Lines         672      720      +48     
==========================================
+ Hits          531      575      +44     
- Misses        141      145       +4

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:

❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

also fix testsuite setup

partially by making use of JuliaGPU/GPUCompiler.jl#727

github-actions · 2025-10-07T15:20:47Z

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic master) to apply these changes.

Click here to view the suggested changes.

diff --git a/lib/cl/kernel.jl b/lib/cl/kernel.jl
index 13290e8..d7b5878 100644
--- a/lib/cl/kernel.jl
+++ b/lib/cl/kernel.jl
@@ -119,8 +119,10 @@ function local_size_for_sub_group_count(ki::KernelSubGroupInfo, sub_group_count:
     d = getfield(ki, :device)
     input_value = Ref{Csize_t}(sub_group_count)
     result = Ref{NTuple{3, Csize_t}}()
-    clGetKernelSubGroupInfo(k, d, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
-                           sizeof(Csize_t), input_value, sizeof(NTuple{3, Csize_t}), result, C_NULL)
+    clGetKernelSubGroupInfo(
+        k, d, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
+        sizeof(Csize_t), input_value, sizeof(NTuple{3, Csize_t}), result, C_NULL
+    )
     return Int.(result[])
 end
 
@@ -135,7 +137,7 @@ function Base.getproperty(ki::KernelSubGroupInfo, s::Symbol)
         return result[]
     end
 
-    if s == :max_sub_group_size
+    return if s == :max_sub_group_size
         Int(get(CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, Csize_t))
     elseif s == :sub_group_count
         Int(get(CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, Csize_t))
@@ -157,8 +159,9 @@ end
 ## kernel calling
 
 function enqueue_kernel(k::Kernel, global_work_size, local_work_size=nothing;
-                        global_work_offset=nothing, wait_on::Vector{Event}=Event[],
-                        device_rng=false)
+        global_work_offset = nothing, wait_on::Vector{Event} = Event[],
+        device_rng = false
+    )
     max_work_dim = device().max_work_item_dims
     work_dim     = length(global_work_size)
     if work_dim > max_work_dim
@@ -249,7 +252,7 @@ function call(
         k::Kernel, args...; global_size = (1,), local_size = nothing,
         global_work_offset = nothing, wait_on::Vector{Event} = Event[],
         indirect_memory::Vector{AbstractMemory} = AbstractMemory[],
-        device_rng=false,
+        device_rng = false,
     )
     set_args!(k, args...)
     if !isempty(indirect_memory)
@@ -303,7 +306,7 @@ function call(
             clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(usm_pointers), usm_pointers)
         end
     end
-    enqueue_kernel(k, global_size, local_size; global_work_offset, wait_on, device_rng)
+    return enqueue_kernel(k, global_size, local_size; global_work_offset, wait_on, device_rng)
 end
 
 # From `julia/base/reflection.jl`, adjusted to add specialization on `t`.
diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl
index 4afacc3..5316c57 100644
--- a/src/compiler/compilation.jl
+++ b/src/compiler/compilation.jl
@@ -20,11 +20,15 @@ GPUCompiler.isintrinsic(job::OpenCLCompilerJob, fn::String) =
 
 GPUCompiler.kernel_state_type(::OpenCLCompilerJob) = KernelState
 
-function GPUCompiler.finish_module!(@nospecialize(job::OpenCLCompilerJob),
-                                    mod::LLVM.Module, entry::LLVM.Function)
-    entry = invoke(GPUCompiler.finish_module!,
-                   Tuple{CompilerJob{SPIRVCompilerTarget}, LLVM.Module, LLVM.Function},
-                   job, mod, entry)
+function GPUCompiler.finish_module!(
+        @nospecialize(job::OpenCLCompilerJob),
+        mod::LLVM.Module, entry::LLVM.Function
+    )
+    entry = invoke(
+        GPUCompiler.finish_module!,
+        Tuple{CompilerJob{SPIRVCompilerTarget}, LLVM.Module, LLVM.Function},
+        job, mod, entry
+    )
 
     # if this kernel uses our RNG, we should prime the shared state.
     # XXX: these transformations should really happen at the Julia IR level...
@@ -36,7 +40,7 @@ function GPUCompiler.finish_module!(@nospecialize(job::OpenCLCompilerJob),
 
         # create a deferred compilation job for `initialize_rng_state`
         src = methodinstance(ft, tt, GPUCompiler.tls_world_age())
-        cfg = CompilerConfig(job.config; kernel=false, name=nothing)
+        cfg = CompilerConfig(job.config; kernel = false, name = nothing)
         job = CompilerJob(src, cfg, job.world)
         id = length(GPUCompiler.deferred_codegen_jobs) + 1
         GPUCompiler.deferred_codegen_jobs[id] = job
@@ -44,7 +48,7 @@ function GPUCompiler.finish_module!(@nospecialize(job::OpenCLCompilerJob),
         # generate IR for calls to `deferred_codegen` and the resulting function pointer
         top_bb = first(blocks(entry))
         bb = BasicBlock(top_bb, "initialize_rng")
-        @dispose builder=IRBuilder() begin
+        @dispose builder = IRBuilder() begin
             position!(builder, bb)
             subprogram = LLVM.subprogram(entry)
             if subprogram !== nothing
diff --git a/src/device/random.jl b/src/device/random.jl
index 396b34b..6bbb1d0 100644
--- a/src/device/random.jl
+++ b/src/device/random.jl
@@ -25,7 +25,7 @@ end
 function initialize_rng_state()
     subgroup_id = get_sub_group_id()
     @inbounds global_random_keys()[subgroup_id] = kernel_state().random_seed
-    @inbounds global_random_counters()[subgroup_id] = 0
+    return @inbounds global_random_counters()[subgroup_id] = 0
 end
 
 # generators
@@ -41,7 +41,7 @@ struct Philox2x32{R} <: RandomNumbers.AbstractRNG{UInt64} end
 @inline function Base.getproperty(rng::Philox2x32, field::Symbol)
     subgroup_id = get_sub_group_id()
 
-    if field === :key
+    return if field === :key
         @inbounds global_random_keys()[subgroup_id]
     elseif field === :ctr1
         @inbounds global_random_counters()[subgroup_id]
@@ -69,7 +69,7 @@ end
 Seed the on-device Philox2x32 generator with an UInt32 number.
 Should be called by at least one thread per warp.
 """
-function Random.seed!(rng::Philox2x32, seed::Integer, counter::Integer=UInt32(0))
+function Random.seed!(rng::Philox2x32, seed::Integer, counter::Integer = UInt32(0))
     rng.key = seed % UInt32
     rng.ctr1 = counter
     return
@@ -99,25 +99,57 @@ end
 
 Generate a byte of random data using the on-device Tausworthe generator.
 """
-function Random.rand(rng::Philox2x32{R},::Type{UInt64}) where {R}
+function Random.rand(rng::Philox2x32{R}, ::Type{UInt64}) where {R}
     ctr1, ctr2, key = rng.ctr1, rng.ctr2, rng.key
 
-    if R > 0                               ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 1  key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 2  key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 3  key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 4  key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 5  key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 6  key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 7  key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 8  key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 9  key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 10 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 11 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 12 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 13 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 14 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
-    if R > 15 key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key); end
+    if R > 0
+        ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 1
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 2
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 3
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 4
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 5
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 6
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 7
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 8
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 9
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 10
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 11
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 12
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 13
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 14
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
+    if R > 15
+        key = philox2x_bumpkey(key); ctr1, ctr2 = philox2x_round(ctr1, ctr2, key)
+    end
 
     # update the warp counter
     # NOTE: this performs the same update on every thread in the warp, but each warp writes
@@ -131,16 +163,15 @@ function Random.rand(rng::Philox2x32{R},::Type{UInt64}) where {R}
 end
 
 
-
 # normally distributed random numbers using Ziggurat algorithm
 #
 # copied from Base because we don't support its global tables
 
 # a hacky method of exposing constant tables as constant GPU memory
 function emit_constant_array(name::Symbol, data::AbstractArray{T}) where {T}
-    @dispose ctx=Context() begin
+    return @dispose ctx = Context() begin
         T_val = convert(LLVMType, T)
-        T_ptr = convert(LLVMType, LLVMPtr{T,AS.UniformConstant})
+        T_ptr = convert(LLVMType, LLVMPtr{T, AS.UniformConstant})
 
         # define function and get LLVM module
         llvm_f, _ = create_function(T_ptr)
@@ -156,7 +187,7 @@ function emit_constant_array(name::Symbol, data::AbstractArray{T}) where {T}
         alignment!(gv, 16)
 
         # generate IR
-        @dispose builder=IRBuilder() begin
+        @dispose builder = IRBuilder() begin
             entry = BasicBlock(llvm_f, "entry")
             position!(builder, entry)
 
@@ -167,17 +198,17 @@ function emit_constant_array(name::Symbol, data::AbstractArray{T}) where {T}
             ret!(builder, untyped_ptr)
         end
 
-        call_function(llvm_f, LLVMPtr{T,AS.UniformConstant})
+        call_function(llvm_f, LLVMPtr{T, AS.UniformConstant})
     end
 end
 
 for var in [:ki, :wi, :fi, :ke, :we, :fe]
     val = getfield(Random, var)
     gpu_var = Symbol("gpu_$var")
-    arr_typ = :(CLDeviceArray{$(eltype(val)),$(ndims(val)),AS.UniformConstant})
+    arr_typ = :(CLDeviceArray{$(eltype(val)), $(ndims(val)), AS.UniformConstant})
     @eval @inline @generated function $gpu_var()
         ptr = emit_constant_array($(QuoteNode(var)), $val)
-        Expr(:call, $arr_typ, $(size(val)), ptr)
+        return Expr(:call, $arr_typ, $(size(val)), ptr)
     end
 end
 
@@ -190,17 +221,17 @@ end
         r &= 0x000fffffffffffff
         rabs = Int64(r >> 1) # One bit for the sign
         idx = rabs & 0xFF
-        x = ifelse(r % Bool, -rabs, rabs)*gpu_wi()[idx+1]
-        rabs < gpu_ki()[idx+1] && return x # 99.3% of the time we return here 1st try
+        x = ifelse(r % Bool, -rabs, rabs) * gpu_wi()[idx + 1]
+        rabs < gpu_ki()[idx + 1] && return x # 99.3% of the time we return here 1st try
         # TODO: This code could be outlined once LLVM supports LDS access in recursively-called functions
         @inbounds if idx == 0
             while true
-                xx = -Random.ziggurat_nor_inv_r*log(Random.rand(rng))
+                xx = -Random.ziggurat_nor_inv_r * log(Random.rand(rng))
                 yy = -log(Random.rand(rng))
-                yy+yy > xx*xx &&
-                    return (rabs >> 8) % Bool ? -Random.ziggurat_nor_r-xx : Random.ziggurat_nor_r+xx
+                yy + yy > xx * xx &&
+                    return (rabs >> 8) % Bool ? -Random.ziggurat_nor_r - xx : Random.ziggurat_nor_r + xx
             end
-        elseif (gpu_fi()[idx] - gpu_fi()[idx+1])*Random.rand(rng) + gpu_fi()[idx+1] < exp(-0.5*x*x)
+        elseif (gpu_fi()[idx] - gpu_fi()[idx + 1]) * Random.rand(rng) + gpu_fi()[idx + 1] < exp(-0.5 * x * x)
             return x # return from the triangular area
         else
             @goto retry
@@ -220,12 +251,12 @@ end
     @inbounds begin
         ri &= 0x000fffffffffffff
         idx = ri & 0xFF
-        x = ri*gpu_we()[idx+1]
-        ri < gpu_ke()[idx+1] && return x # 98.9% of the time we return here 1st try
+        x = ri * gpu_we()[idx + 1]
+        ri < gpu_ke()[idx + 1] && return x # 98.9% of the time we return here 1st try
         # TODO: This code could be outlined once LLVM supports LDS access in recursively-called functions
         @inbounds if idx == 0
             return Random.ziggurat_exp_r - log(Random.rand(rng))
-        elseif (gpu_fe()[idx] - gpu_fe()[idx+1])*Random.rand(rng) + gpu_fe()[idx+1] < exp(-x)
+        elseif (gpu_fe()[idx] - gpu_fe()[idx + 1]) * Random.rand(rng) + gpu_fe()[idx + 1] < exp(-x)
             return x # return from the triangular area
         else
             @goto retry
diff --git a/src/device/runtime.jl b/src/device/runtime.jl
index 52acd04..2238db5 100644
--- a/src/device/runtime.jl
+++ b/src/device/runtime.jl
@@ -34,7 +34,7 @@ end
 
 # run-time equivalent
 function additional_arg_value(state, name)
-    @dispose ctx=Context() begin
+    return @dispose ctx = Context() begin
         T_state = convert(LLVMType, state)
 
         # create function
@@ -46,7 +46,7 @@ function additional_arg_value(state, name)
         state_intr_ft = function_type(state_intr)
 
         # generate IR
-        @dispose builder=IRBuilder() begin
+        @dispose builder = IRBuilder() begin
             entry = BasicBlock(llvm_f, "entry")
             position!(builder, entry)
 
diff --git a/test/device/random.jl b/test/device/random.jl
index 2e74268..5cf3d6f 100644
--- a/test/device/random.jl
+++ b/test/device/random.jl
@@ -3,7 +3,7 @@ using Random
 const n = 256
 
 function apply_seed(seed)
-    if seed === missing
+    return if seed === missing
         # should result in different numbers across launches
         Random.seed!()
         # XXX: this currently doesn't work, because of the definition in Base,
@@ -33,8 +33,8 @@ eltypes = [filter(x -> !(x <: Complex), GPUArraysTestSuite.supported_eltypes(CLA
         a = OpenCL.zeros(T, n)
         b = OpenCL.zeros(T, n)
 
-        @opencl global_size=n local_size=n kernel(a, seed)
-        @opencl global_size=n local_size=n kernel(b, seed)
+        @opencl global_size = n local_size = n kernel(a, seed)
+        @opencl global_size = n local_size = n kernel(b, seed)
 
         if seed === nothing || seed === missing
             @test Array(a) != Array(b)
@@ -56,7 +56,7 @@ eltypes = [filter(x -> !(x <: Complex), GPUArraysTestSuite.supported_eltypes(CLA
         a = OpenCL.zeros(T, n)
         b = OpenCL.zeros(T, n)
 
-        @opencl global_size=n local_size=n kernel(a, b, seed)
+        @opencl global_size = n local_size = n kernel(a, b, seed)
 
         @test Array(a) != Array(b)
     end
@@ -67,7 +67,7 @@ eltypes = [filter(x -> !(x <: Complex), GPUArraysTestSuite.supported_eltypes(CLA
             function kernel(A::AbstractArray{T}, seed) where {T}
                 apply_seed(seed)
                 id = get_local_id(1) * get_local_id(2) * get_local_id(3) *
-                     get_group_id(1) * get_group_id(2) * get_group_id(3)
+                    get_group_id(1) * get_group_id(2) * get_group_id(3)
                 if 1 <= id <= length(A)
                     A[id] = rand(T)
                 end
@@ -75,10 +75,10 @@ eltypes = [filter(x -> !(x <: Complex), GPUArraysTestSuite.supported_eltypes(CLA
             end
 
             tx, ty, tz, bx, by, bz = [dim == active_dim ? 3 : 1 for dim in 1:6]
-            gx, gy, gz = tx*bx, ty*by, tz*bz
+            gx, gy, gz = tx * bx, ty * by, tz * bz
             a = OpenCL.zeros(T, 3)
 
-            @opencl local_size=(tx, ty, tz) global_size=(gx, gy, gz) kernel(a, seed)
+            @opencl local_size = (tx, ty, tz) global_size = (gx, gy, gz) kernel(a, seed)
 
             # NOTE: we don't just generate two numbers and compare them, instead generating a
             #       couple more and checking they're not all the same, in order to avoid
@@ -99,8 +99,8 @@ end
     a = OpenCL.zeros(T, n)
     b = OpenCL.zeros(T, n)
 
-    @opencl global_size=n local_size=n kernel(a, seed)
-    @opencl global_size=n local_size=n kernel(b, seed)
+    @opencl global_size = n local_size = n kernel(a, seed)
+    @opencl global_size = n local_size = n kernel(b, seed)
 
     if seed === nothing || seed === missing
         @test Array(a) != Array(b)
@@ -120,8 +120,8 @@ end
     a = OpenCL.zeros(T, n)
     b = OpenCL.zeros(T, n)
 
-    @opencl global_size=n local_size=n kernel(a, seed)
-    @opencl global_size=n local_size=n kernel(b, seed)
+    @opencl global_size = n local_size = n kernel(a, seed)
+    @opencl global_size = n local_size = n kernel(b, seed)
 
     if seed === nothing || seed === missing
         @test Array(a) != Array(b)
diff --git a/test/setup.jl b/test/setup.jl
index 90337d3..22e5f43 100644
--- a/test/setup.jl
+++ b/test/setup.jl
@@ -91,7 +91,7 @@ function runtests(f, name, platform_filter)
 
         # some tests require native execution capabilities
         requires_il = name in ["atomics", "execution", "intrinsics", "kernelabstractions"] ||
-                      startswith(name, "gpuarrays/") || startswith(name, "device/")
+            startswith(name, "gpuarrays/") || startswith(name, "device/")
 
         ex = quote
             GC.gc(true)

maleadt · 2025-10-08T10:51:59Z

I don't like the additional argument passing. Isn't this because we currently only support dynamically-sized local memory? IIUC it should be possible to define it entirely in the module, like such:

__local float localBuffer[1024];

simeonschaub · 2025-10-08T11:42:39Z

The issue is that to know how much memory we are going to need, we need to query the number of subgroups inside a workgroup. I don't think we can do that for local memory declared inside a module.

simeonschaub closed this Sep 7, 2025

simeonschaub reopened this Sep 7, 2025

simeonschaub mentioned this pull request Sep 9, 2025

add_input_arguments! for other backends JuliaGPU/GPUCompiler.jl#718

Merged

simeonschaub added 19 commits October 7, 2025 13:27

wip: device-side rng

6e15a65

some things work!

ddb3eae

it's getting there!

e3e2d4f

wip

82db0de

wip

85cb656

wip

dd23a73

it works!!!!!!

09a9cc2

only add rng state if needed

a2a270b

some cleanup

9146d30

add some tests

6426cc8

fix global_id calculation

9337180

also fix testsuite setup

[revert me] checkout GPUCompiler devbranch on CI

ee1207e

adapt to JuliaGPU/GPUCompiler.jl#718

e893726

fixup!

d21c0dc

adapt to custom_intr rename

84e5d3d

add Float16 math functions

385d32b

no fallback to Float64 for randn and randexp

da6a5b3

adapt to GPUCompiler changes

767877d

change GPUCompiler branch

a767801

simeonschaub force-pushed the sds/rng branch from 3264565 to 17892c2 Compare October 7, 2025 11:27

simplify implementation

aca6d1c

partially by making use of JuliaGPU/GPUCompiler.jl#727

simeonschaub force-pushed the sds/rng branch from 17892c2 to aca6d1c Compare October 7, 2025 11:29

simeonschaub mentioned this pull request Oct 7, 2025

New interface: process_linked_module! JuliaGPU/GPUCompiler.jl#727

Merged

simeonschaub added 2 commits October 7, 2025 15:53

fix Bool tests

38b32b0

use released GPUCompiler

449ca22

simeonschaub marked this pull request as ready for review October 7, 2025 13:57

vchuravy mentioned this pull request Oct 7, 2025

[pocl] Device side RNG JuliaGPU/KernelAbstractions.jl#641

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

implement device-side RNG #365

implement device-side RNG #365

Uh oh!

simeonschaub commented Sep 6, 2025 •

edited

Loading

Uh oh!

codecov bot commented Sep 7, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Oct 7, 2025

Uh oh!

maleadt commented Oct 8, 2025

Uh oh!

simeonschaub commented Oct 8, 2025

Uh oh!

Uh oh!

implement device-side RNG #365

Are you sure you want to change the base?

implement device-side RNG #365

Uh oh!

Conversation

simeonschaub commented Sep 6, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

codecov bot commented Sep 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Codecov Report

Uh oh!

github-actions bot commented Oct 7, 2025

Uh oh!

maleadt commented Oct 8, 2025

Uh oh!

simeonschaub commented Oct 8, 2025

Uh oh!

Uh oh!

simeonschaub commented Sep 6, 2025 •

edited

Loading

codecov bot commented Sep 7, 2025 •

edited

Loading