Skip to content

Support for opaque closures #421

@maleadt

Description

@maleadt

I'm not sure if these make much sense in the context of GPU compilation, but here's at least a sketch of how initial support could look like:

using GPUCompiler, LLVM
include("test/definitions/native.jl")


## GPU-compatible opaque closures

using Core.Compiler: IRCode
using Core: CodeInfo, MethodInstance, CodeInstance, LineNumberNode

struct OpaqueGPUClosure{F, E, A, R}    # func, env, args, ret
    env::E
end

function compute_ir_rettype(ir::IRCode)
    rt = Union{}
    for i = 1:length(ir.stmts)
        stmt = ir.stmts[i][:inst]
        if isa(stmt, Core.Compiler.ReturnNode) && isdefined(stmt, :val)
            rt = Core.Compiler.tmerge(Core.Compiler.argextype(stmt.val, ir), rt)
        end
    end
    return Core.Compiler.widenconst(rt)
end

function compute_oc_signature(ir::IRCode, nargs::Int, isva::Bool)
    argtypes = Vector{Any}(undef, nargs)
    for i = 1:nargs
        argtypes[i] = Core.Compiler.widenconst(ir.argtypes[i+1])
    end
    if isva
        lastarg = pop!(argtypes)
        if lastarg <: Tuple
            append!(argtypes, lastarg.parameters)
        else
            push!(argtypes, Vararg{Any})
        end
    end
    return Tuple{argtypes...}
end

function OpaqueGPUClosure(config::CompilerConfig, ir::IRCode, @nospecialize env...;
                          isva::Bool = false)
    # NOTE: we need ir.argtypes[1] == typeof(env)
    ir = Core.Compiler.copy(ir)
    nargs = length(ir.argtypes)-1
    sig = compute_oc_signature(ir, nargs, isva)
    rt = compute_ir_rettype(ir)
    src = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
    src.slotnames = fill(:none, nargs+1)
    src.slotflags = fill(zero(UInt8), length(ir.argtypes))
    src.slottypes = copy(ir.argtypes)
    src.rettype = rt
    src = Core.Compiler.ir_to_codeinf!(src, ir)
    return generate_opaque_closure(config, src, sig, rt, nargs, isva, env...)
end

function generate_opaque_closure(config::CompilerConfig, src::CodeInfo,
                                 @nospecialize(sig), @nospecialize(rt),
                                 nargs::Int, isva::Bool, @nospecialize env...;
                                 mod::Module=@__MODULE__,
                                 line::Int=0,
                                 file::Union{Nothing,Symbol}=nothing)
    # create a method (like `jl_make_opaque_closure_method`)
    meth = ccall(:jl_new_method_uninit, Ref{Method}, (Any,), Main)
    meth.sig = Tuple
    meth.isva = isva
    meth.is_for_opaque_closure = 0  # XXX: this drives heuristics, some of which we want, others we don't
    meth.name = Symbol("opaque gpu closure")
    meth.nargs = nargs + 1
    meth.file = something(file, Symbol())
    meth.line = line
    ccall(:jl_method_set_source, Nothing, (Any, Any), meth, src)

    # look up a method instance and create a compiler job
    full_sig = Tuple{typeof(env), sig.parameters...}
    mi = ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), meth, full_sig, Core.svec())
    job = CompilerJob(mi, config)

    # create a code instance and store it in the cache
    ci = CodeInstance(mi, rt, C_NULL, src, Int32(0), meth.primary_world, typemax(UInt), UInt32(0), UInt32(0), nothing, UInt8(0))
    Core.Compiler.setindex!(GPUCompiler.ci_cache(job), ci, mi)

    id = length(GPUCompiler.deferred_codegen_jobs) + 1
    GPUCompiler.deferred_codegen_jobs[id] = job
    return OpaqueGPUClosure{id, typeof(env), sig, rt}(env)
end

# generate a call overload for the opaque closure
function (oc::OpaqueGPUClosure{F})(a, b) where F
    ptr = ccall("extern deferred_codegen", llvmcall, Ptr{Cvoid}, (Int,), F)
    LLVM.Interop.assume(ptr != C_NULL)
    return ccall(ptr, Int, (Int, Int), a, b)
end


## demo

function kernel(oc, c, a, b)
    unsafe_store!(c, oc(unsafe_load(a), unsafe_load(b)))
    return
end

function main()
    target = NativeCompilerTarget()
    params = TestCompilerParams()

    ir, rettyp = only(Base.code_ircode(+, (Int, Int)))
    config = CompilerConfig(target, params; kernel=false)
    oc = OpaqueGPUClosure(config, ir)

    GPUCompiler.JuliaContext() do ctx
        source = methodinstance(typeof(kernel), Tuple{typeof(oc), Ptr{Int}, Ptr{Int}, Ptr{Int}})
        config = CompilerConfig(target, params)
        job = CompilerJob(source, config)
        println(GPUCompiler.compile(:llvm, job; ctx)[1])
    end
end

isinteractive() || main()

These don't actually implement the OpaqueClosure semantics and features (world freezing, env/varargs support, etc), and can only be constructed from typed IR, but it's a start at least. For my use case, I only need to inline typed IR, so it doesn't make sense to accurately implement the OpaqueClosure semantics. That use case works nicely though:

define void @_Z6kernel16OpaqueGPUClosureILi1E5TupleS0_I5Int64S1_ES1_EPS1_PS1_PS1_(i64 zeroext %0, i64 zeroext %1, i64 zeroext %2) local_unnamed_addr #0 !dbg !64 {
top:
  %3 = inttoptr i64 %2 to i64*, !dbg !68
  %4 = load i64, i64* %3, align 1, !dbg !68, !tbaa !73, !alias.scope !77, !noalias !80
  %5 = inttoptr i64 %1 to i64*, !dbg !68
  %6 = load i64, i64* %5, align 1, !dbg !68, !tbaa !73, !alias.scope !77, !noalias !80
  %7 = call i64 @julia_opaque_gpu_closure_487(i64 %6, i64 %4), !dbg !85
  %8 = inttoptr i64 %0 to i64*, !dbg !87
  store i64 %7, i64* %8, align 1, !dbg !87, !tbaa !73, !alias.scope !77, !noalias !80
  ret void, !dbg !90
}

define i64 @julia_opaque_gpu_closure_487(i64 signext %0, i64 signext %1) local_unnamed_addr #0 !dbg !91 {
top:
  %2 = add i64 %1, %0, !dbg !93
  ret i64 %2, !dbg !93
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions