Skip to content

Commit 494a01c

Browse files
authored
Adapt to GPUCompiler 0.15 changes (#1488)
* Bump GPUCompiler. * Adapt to FunctionSpec change. GPUCompiler doesn't know about the function instance anymore, so create the HostKernel when we still do.
1 parent 056a526 commit 494a01c

File tree

5 files changed

+15
-10
lines changed

5 files changed

+15
-10
lines changed

Manifest.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,9 @@ version = "8.3.2"
8989

9090
[[GPUCompiler]]
9191
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
92-
git-tree-sha1 = "556190e1e0ea3e37d83059fc9aa576f1e2104375"
92+
git-tree-sha1 = "05374e47bb136db517b33f62fbe852adf8deb0be"
9393
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
94-
version = "0.14.1"
94+
version = "0.15.1"
9595

9696
[[InteractiveUtils]]
9797
deps = ["Markdown"]

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ BFloat16s = "0.2"
3333
CEnum = "0.2, 0.3, 0.4"
3434
ExprTools = "0.1"
3535
GPUArrays = "8.3.2"
36-
GPUCompiler = "0.14"
36+
GPUCompiler = "0.15.1"
3737
LLVM = "4.5.3"
3838
Random123 = "1.2"
3939
RandomNumbers = "1.5.3"

src/compiler/execution.jl

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,10 @@ end
208208

209209
## host-side kernels
210210

211+
# XXX: storing the function instance, but not the arguments, is inconsistent.
212+
# either store the instance and args, making this object directly callable,
213+
# or store neither and cache it when getting it directly from GPUCompiler.
214+
211215
struct HostKernel{F,TT} <: AbstractKernel{F,TT}
212216
f::F
213217
ctx::CuContext
@@ -294,9 +298,10 @@ when function changes, or when different types or keyword arguments are provided
294298
target = CUDACompilerTarget(cuda.device; kwargs...)
295299
params = CUDACompilerParams()
296300
job = CompilerJob(target, source, params)
297-
return GPUCompiler.cached_compilation(cache, job,
298-
cufunction_compile,
299-
cufunction_link)::HostKernel{F,tt}
301+
res = GPUCompiler.cached_compilation(cache, job,
302+
cufunction_compile,
303+
cufunction_link)
304+
HostKernel{F,tt}(f, cuda.context, res.mod, res.fun, res.state)
300305
end
301306

302307
# XXX: does this need a lock? we'll only write to it when we have the typeinf lock.
@@ -461,7 +466,7 @@ end
461466
exception_ptr = create_exceptions!(mod)
462467
state = KernelState(exception_ptr)
463468

464-
return HostKernel{typeof(job.source.f),job.source.tt}(job.source.f, ctx, mod, fun, state)
469+
return (; mod, fun, state)
465470
end
466471

467472
function (kernel::HostKernel)(args...; threads::CuDim=1, blocks::CuDim=1, kwargs...)

src/compiler/reflection.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,10 @@ function return_type(@nospecialize(func), @nospecialize(tt))
143143
job = CompilerJob(target, source, params)
144144
interp = GPUCompiler.get_interpreter(job)
145145
if VERSION >= v"1.8-"
146-
sig = Base.signature_type(job.source.f, job.source.tt)
146+
sig = Base.signature_type(func, tt)
147147
Core.Compiler.return_type(interp, sig)
148148
else
149-
Core.Compiler.return_type(interp, job.source.f, job.source.tt)
149+
Core.Compiler.return_type(interp, func, tt)
150150
end
151151
end
152152

test/execution.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ end
100100
end)))
101101

102102
@test CUDA.return_type(identity, Tuple{Int}) === Int
103-
@test CUDA.return_type(CUDA.sin, Tuple{Float32}) === Float32
103+
@test CUDA.return_type(sin, Tuple{Float32}) === Float32
104104
@test CUDA.return_type(getindex, Tuple{CuDeviceArray{Float32,1,1},Int32}) === Float32
105105
@test CUDA.return_type(getindex, Tuple{Base.RefValue{Integer}}) === Integer
106106
end

0 commit comments

Comments
 (0)