-
Notifications
You must be signed in to change notification settings - Fork 59
Open
Description
I'm not sure if these make much sense in the context of GPU compilation, but here's at least a sketch of how initial support could look like:
using GPUCompiler, LLVM
include("test/definitions/native.jl")
## GPU-compatible opaque closures
using Core.Compiler: IRCode
using Core: CodeInfo, MethodInstance, CodeInstance, LineNumberNode
struct OpaqueGPUClosure{F, E, A, R} # func, env, args, ret
env::E
end
function compute_ir_rettype(ir::IRCode)
rt = Union{}
for i = 1:length(ir.stmts)
stmt = ir.stmts[i][:inst]
if isa(stmt, Core.Compiler.ReturnNode) && isdefined(stmt, :val)
rt = Core.Compiler.tmerge(Core.Compiler.argextype(stmt.val, ir), rt)
end
end
return Core.Compiler.widenconst(rt)
end
function compute_oc_signature(ir::IRCode, nargs::Int, isva::Bool)
argtypes = Vector{Any}(undef, nargs)
for i = 1:nargs
argtypes[i] = Core.Compiler.widenconst(ir.argtypes[i+1])
end
if isva
lastarg = pop!(argtypes)
if lastarg <: Tuple
append!(argtypes, lastarg.parameters)
else
push!(argtypes, Vararg{Any})
end
end
return Tuple{argtypes...}
end
function OpaqueGPUClosure(config::CompilerConfig, ir::IRCode, @nospecialize env...;
isva::Bool = false)
# NOTE: we need ir.argtypes[1] == typeof(env)
ir = Core.Compiler.copy(ir)
nargs = length(ir.argtypes)-1
sig = compute_oc_signature(ir, nargs, isva)
rt = compute_ir_rettype(ir)
src = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
src.slotnames = fill(:none, nargs+1)
src.slotflags = fill(zero(UInt8), length(ir.argtypes))
src.slottypes = copy(ir.argtypes)
src.rettype = rt
src = Core.Compiler.ir_to_codeinf!(src, ir)
return generate_opaque_closure(config, src, sig, rt, nargs, isva, env...)
end
function generate_opaque_closure(config::CompilerConfig, src::CodeInfo,
@nospecialize(sig), @nospecialize(rt),
nargs::Int, isva::Bool, @nospecialize env...;
mod::Module=@__MODULE__,
line::Int=0,
file::Union{Nothing,Symbol}=nothing)
# create a method (like `jl_make_opaque_closure_method`)
meth = ccall(:jl_new_method_uninit, Ref{Method}, (Any,), Main)
meth.sig = Tuple
meth.isva = isva
meth.is_for_opaque_closure = 0 # XXX: this drives heuristics, some of which we want, others we don't
meth.name = Symbol("opaque gpu closure")
meth.nargs = nargs + 1
meth.file = something(file, Symbol())
meth.line = line
ccall(:jl_method_set_source, Nothing, (Any, Any), meth, src)
# look up a method instance and create a compiler job
full_sig = Tuple{typeof(env), sig.parameters...}
mi = ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), meth, full_sig, Core.svec())
job = CompilerJob(mi, config)
# create a code instance and store it in the cache
ci = CodeInstance(mi, rt, C_NULL, src, Int32(0), meth.primary_world, typemax(UInt), UInt32(0), UInt32(0), nothing, UInt8(0))
Core.Compiler.setindex!(GPUCompiler.ci_cache(job), ci, mi)
id = length(GPUCompiler.deferred_codegen_jobs) + 1
GPUCompiler.deferred_codegen_jobs[id] = job
return OpaqueGPUClosure{id, typeof(env), sig, rt}(env)
end
# generate a call overload for the opaque closure
function (oc::OpaqueGPUClosure{F})(a, b) where F
ptr = ccall("extern deferred_codegen", llvmcall, Ptr{Cvoid}, (Int,), F)
LLVM.Interop.assume(ptr != C_NULL)
return ccall(ptr, Int, (Int, Int), a, b)
end
## demo
function kernel(oc, c, a, b)
unsafe_store!(c, oc(unsafe_load(a), unsafe_load(b)))
return
end
function main()
target = NativeCompilerTarget()
params = TestCompilerParams()
ir, rettyp = only(Base.code_ircode(+, (Int, Int)))
config = CompilerConfig(target, params; kernel=false)
oc = OpaqueGPUClosure(config, ir)
GPUCompiler.JuliaContext() do ctx
source = methodinstance(typeof(kernel), Tuple{typeof(oc), Ptr{Int}, Ptr{Int}, Ptr{Int}})
config = CompilerConfig(target, params)
job = CompilerJob(source, config)
println(GPUCompiler.compile(:llvm, job; ctx)[1])
end
end
isinteractive() || main()These don't actually implement the OpaqueClosure semantics and features (world freezing, env/varargs support, etc), and can only be constructed from typed IR, but it's a start at least. For my use case, I only need to inline typed IR, so it doesn't make sense to accurately implement the OpaqueClosure semantics. That use case works nicely though:
define void @_Z6kernel16OpaqueGPUClosureILi1E5TupleS0_I5Int64S1_ES1_EPS1_PS1_PS1_(i64 zeroext %0, i64 zeroext %1, i64 zeroext %2) local_unnamed_addr #0 !dbg !64 {
top:
%3 = inttoptr i64 %2 to i64*, !dbg !68
%4 = load i64, i64* %3, align 1, !dbg !68, !tbaa !73, !alias.scope !77, !noalias !80
%5 = inttoptr i64 %1 to i64*, !dbg !68
%6 = load i64, i64* %5, align 1, !dbg !68, !tbaa !73, !alias.scope !77, !noalias !80
%7 = call i64 @julia_opaque_gpu_closure_487(i64 %6, i64 %4), !dbg !85
%8 = inttoptr i64 %0 to i64*, !dbg !87
store i64 %7, i64* %8, align 1, !dbg !87, !tbaa !73, !alias.scope !77, !noalias !80
ret void, !dbg !90
}
define i64 @julia_opaque_gpu_closure_487(i64 signext %0, i64 signext %1) local_unnamed_addr #0 !dbg !91 {
top:
%2 = add i64 %1, %0, !dbg !93
ret i64 %2, !dbg !93
}Metadata
Metadata
Assignees
Labels
No labels