Skip to content

Commit 7872106

Browse files
committed
Import compiler part from OpenCL
1 parent af3dc37 commit 7872106

File tree

7 files changed

+486
-20
lines changed

7 files changed

+486
-20
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ version = "0.9.33"
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
88
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
99
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
10+
GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
1011
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
1112
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1213
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"

src/pocl.jl

Lines changed: 0 additions & 20 deletions
This file was deleted.

src/pocl/compiler/compilation.jl

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
## gpucompiler interface
2+
3+
struct OpenCLCompilerParams <: AbstractCompilerParams end
4+
const OpenCLCompilerConfig = CompilerConfig{SPIRVCompilerTarget, OpenCLCompilerParams}
5+
const OpenCLCompilerJob = CompilerJob{SPIRVCompilerTarget,OpenCLCompilerParams}
6+
7+
GPUCompiler.runtime_module(::CompilerJob{<:Any,OpenCLCompilerParams}) = POCL
8+
9+
GPUCompiler.method_table(::OpenCLCompilerJob) = method_table
10+
11+
# filter out OpenCL built-ins
12+
# TODO: eagerly lower these using the translator API
13+
GPUCompiler.isintrinsic(job::OpenCLCompilerJob, fn::String) =
14+
invoke(GPUCompiler.isintrinsic,
15+
Tuple{CompilerJob{SPIRVCompilerTarget}, typeof(fn)},
16+
job, fn) ||
17+
in(fn, opencl_builtins)
18+
19+
20+
## compiler implementation (cache, configure, compile, and link)
21+
22+
# cache of compilation caches, per context
23+
const _compiler_caches = Dict{cl.Context, Dict{Any, Any}}()
24+
function compiler_cache(ctx::cl.Context)
25+
cache = get(_compiler_caches, ctx, nothing)
26+
if cache === nothing
27+
cache = Dict{Any, Any}()
28+
_compiler_caches[ctx] = cache
29+
end
30+
return cache
31+
end
32+
33+
# cache of compiler configurations, per device (but additionally configurable via kwargs)
34+
const _toolchain = Ref{Any}()
35+
const _compiler_configs = Dict{UInt, OpenCLCompilerConfig}()
36+
function compiler_config(dev::cl.Device; kwargs...)
37+
h = hash(dev, hash(kwargs))
38+
config = get(_compiler_configs, h, nothing)
39+
if config === nothing
40+
config = _compiler_config(dev; kwargs...)
41+
_compiler_configs[h] = config
42+
end
43+
return config
44+
end
45+
@noinline function _compiler_config(dev; kernel=true, name=nothing, always_inline=false, kwargs...)
46+
supports_fp16 = "cl_khr_fp16" in dev.extensions
47+
supports_fp64 = "cl_khr_fp64" in dev.extensions
48+
49+
# create GPUCompiler objects
50+
target = SPIRVCompilerTarget(; supports_fp16, supports_fp64, kwargs...)
51+
params = OpenCLCompilerParams()
52+
CompilerConfig(target, params; kernel, name, always_inline)
53+
end
54+
55+
# compile to executable machine code
56+
function compile(@nospecialize(job::CompilerJob))
57+
# TODO: this creates a context; cache those.
58+
obj, meta = JuliaContext() do ctx
59+
GPUCompiler.compile(:obj, job)
60+
end
61+
62+
(obj, entry=LLVM.name(meta.entry))
63+
end
64+
65+
# link into an executable kernel
66+
function link(@nospecialize(job::CompilerJob), compiled)
67+
prog = if "cl_khr_il_program" in cl.device().extensions
68+
cl.Program(compiled.obj, context())
69+
else
70+
error("Your device does not support SPIR-V, which is currently required for native execution.")
71+
end
72+
cl.build!(prog)
73+
cl.Kernel(prog, compiled.entry)
74+
end

src/pocl/compiler/execution.jl

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
export @opencl, clfunction, clconvert
2+
3+
4+
## high-level @opencl interface
5+
6+
const MACRO_KWARGS = [:launch]
7+
const COMPILER_KWARGS = [:kernel, :name, :always_inline]
8+
const LAUNCH_KWARGS = [:global_size, :local_size, :queue]
9+
10+
macro opencl(ex...)
11+
call = ex[end]
12+
kwargs = map(ex[1:end-1]) do kwarg
13+
if kwarg isa Symbol
14+
:($kwarg = $kwarg)
15+
elseif Meta.isexpr(kwarg, :(=))
16+
kwarg
17+
else
18+
throw(ArgumentError("Invalid keyword argument '$kwarg'"))
19+
end
20+
end
21+
22+
# destructure the kernel call
23+
Meta.isexpr(call, :call) || throw(ArgumentError("second argument to @opencl should be a function call"))
24+
f = call.args[1]
25+
args = call.args[2:end]
26+
27+
code = quote end
28+
vars, var_exprs = assign_args!(code, args)
29+
30+
# group keyword argument
31+
macro_kwargs, compiler_kwargs, call_kwargs, other_kwargs =
32+
split_kwargs(kwargs, MACRO_KWARGS, COMPILER_KWARGS, LAUNCH_KWARGS)
33+
if !isempty(other_kwargs)
34+
key,val = first(other_kwargs).args
35+
throw(ArgumentError("Unsupported keyword argument '$key'"))
36+
end
37+
38+
# handle keyword arguments that influence the macro's behavior
39+
launch = true
40+
for kwarg in macro_kwargs
41+
key,val = kwarg.args
42+
if key == :launch
43+
isa(val, Bool) || throw(ArgumentError("`launch` keyword argument to @opencl should be a constant value"))
44+
launch = val::Bool
45+
else
46+
throw(ArgumentError("Unsupported keyword argument '$key'"))
47+
end
48+
end
49+
if !launch && !isempty(call_kwargs)
50+
error("@opencl with launch=false does not support launch-time keyword arguments; use them when calling the kernel")
51+
end
52+
53+
# FIXME: macro hygiene wrt. escaping kwarg values (this broke with 1.5)
54+
# we esc() the whole thing now, necessitating gensyms...
55+
@gensym f_var kernel_f kernel_args kernel_tt kernel
56+
57+
# convert the arguments, call the compiler and launch the kernel
58+
# while keeping the original arguments alive
59+
push!(code.args,
60+
quote
61+
$f_var = $f
62+
GC.@preserve $(vars...) $f_var begin
63+
$kernel_f = $clconvert($f_var)
64+
$kernel_args = map($clconvert, ($(var_exprs...),))
65+
$kernel_tt = Tuple{map(Core.Typeof, $kernel_args)...}
66+
$kernel = $clfunction($kernel_f, $kernel_tt; $(compiler_kwargs...))
67+
if $launch
68+
$kernel($(var_exprs...); $(call_kwargs...))
69+
end
70+
$kernel
71+
end
72+
end)
73+
74+
return esc(quote
75+
let
76+
$code
77+
end
78+
end)
79+
end
80+
81+
82+
## argument conversion
83+
84+
struct KernelAdaptor
85+
svm_pointers::Vector{Ptr{Cvoid}}
86+
end
87+
88+
# assume directly-passed pointers are SVM pointers
89+
function Adapt.adapt_storage(to::KernelAdaptor, ptr::Ptr{T}) where {T}
90+
push!(to.svm_pointers, ptr)
91+
return ptr
92+
end
93+
94+
# convert SVM buffers to their GPU address
95+
function Adapt.adapt_storage(to::KernelAdaptor, buf::cl.SVMBuffer)
96+
ptr = pointer(buf)
97+
push!(to.svm_pointers, ptr)
98+
return ptr
99+
end
100+
101+
# Base.RefValue isn't GPU compatible, so provide a compatible alternative
102+
# TODO: port improvements from CUDA.jl
103+
struct CLRefValue{T} <: Ref{T}
104+
x::T
105+
end
106+
Base.getindex(r::CLRefValue) = r.x
107+
Adapt.adapt_structure(to::KernelAdaptor, r::Base.RefValue) = CLRefValue(adapt(to, r[]))
108+
109+
# broadcast sometimes passes a ref(type), resulting in a GPU-incompatible DataType box.
110+
# avoid that by using a special kind of ref that knows about the boxed type.
111+
struct CLRefType{T} <: Ref{DataType} end
112+
Base.getindex(r::CLRefType{T}) where T = T
113+
Adapt.adapt_structure(to::KernelAdaptor, r::Base.RefValue{<:Union{DataType,Type}}) =
114+
CLRefType{r[]}()
115+
116+
# case where type is the function being broadcasted
117+
Adapt.adapt_structure(to::KernelAdaptor,
118+
bc::Broadcast.Broadcasted{Style, <:Any, Type{T}}) where {Style, T} =
119+
Broadcast.Broadcasted{Style}((x...) -> T(x...), adapt(to, bc.args), bc.axes)
120+
121+
"""
122+
clconvert(x, [pointers])
123+
124+
This function is called for every argument to be passed to a kernel, allowing it to be
125+
converted to a GPU-friendly format. By default, the function does nothing and returns the
126+
input object `x` as-is.
127+
128+
Do not add methods to this function, but instead extend the underlying Adapt.jl package and
129+
register methods for the the `OpenCL.KernelAdaptor` type.
130+
131+
The `pointers` argument is used to collect pointers to indirect SVM buffers, which need to
132+
be registered with OpenCL before invoking the kernel.
133+
"""
134+
function clconvert(arg, pointers::Vector{Ptr{Cvoid}}=Ptr{Cvoid}[])
135+
adapt(KernelAdaptor(pointers), arg)
136+
end
137+
138+
139+
140+
## abstract kernel functionality
141+
142+
abstract type AbstractKernel{F,TT} end
143+
144+
@inline @generated function (kernel::AbstractKernel{F,TT})(args...;
145+
call_kwargs...) where {F,TT}
146+
sig = Tuple{F, TT.parameters...} # Base.signature_type with a function type
147+
args = (:(kernel.f), (:( clconvert(args[$i], svm_pointers) ) for i in 1:length(args))...)
148+
149+
# filter out ghost arguments that shouldn't be passed
150+
predicate = dt -> isghosttype(dt) || Core.Compiler.isconstType(dt)
151+
to_pass = map(!predicate, sig.parameters)
152+
call_t = Type[x[1] for x in zip(sig.parameters, to_pass) if x[2]]
153+
call_args = Union{Expr,Symbol}[x[1] for x in zip(args, to_pass) if x[2]]
154+
155+
# replace non-isbits arguments (they should be unused, or compilation would have failed)
156+
for (i,dt) in enumerate(call_t)
157+
if !isbitstype(dt)
158+
call_t[i] = Ptr{Any}
159+
call_args[i] = :C_NULL
160+
end
161+
end
162+
163+
# finalize types
164+
call_tt = Base.to_tuple_type(call_t)
165+
166+
quote
167+
svm_pointers = Ptr{Cvoid}[]
168+
clcall(kernel.fun, $call_tt, $(call_args...); svm_pointers, call_kwargs...)
169+
end
170+
end
171+
172+
173+
174+
## host-side kernels
175+
176+
struct HostKernel{F,TT} <: AbstractKernel{F,TT}
177+
f::F
178+
fun::cl.Kernel
179+
end
180+
181+
182+
## host-side API
183+
184+
const clfunction_lock = ReentrantLock()
185+
186+
function clfunction(f::F, tt::TT=Tuple{}; kwargs...) where {F,TT}
187+
ctx = cl.context()
188+
dev = cl.device()
189+
190+
Base.@lock clfunction_lock begin
191+
# compile the function
192+
cache = compiler_cache(ctx)
193+
source = methodinstance(F, tt)
194+
config = compiler_config(dev; kwargs...)::OpenCLCompilerConfig
195+
fun = GPUCompiler.cached_compilation(cache, source, config, compile, link)
196+
197+
# create a callable object that captures the function instance. we don't need to think
198+
# about world age here, as GPUCompiler already does and will return a different object
199+
h = hash(fun, hash(f, hash(tt)))
200+
kernel = get(_kernel_instances, h, nothing)
201+
if kernel === nothing
202+
# create the kernel state object
203+
kernel = HostKernel{F,tt}(f, fun)
204+
_kernel_instances[h] = kernel
205+
end
206+
return kernel::HostKernel{F,tt}
207+
end
208+
end
209+
210+
# cache of kernel instances
211+
const _kernel_instances = Dict{UInt, Any}()

src/pocl/compiler/reflection.jl

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# code reflection entry-points
2+
3+
#
4+
# code_* replacements
5+
#
6+
7+
# function to split off certain kwargs for selective forwarding, at run time.
8+
# `@opencl` does something similar at parse time, using `GPUCompiler.split_kwargs`.
9+
function split_kwargs_runtime(kwargs, wanted::Vector{Symbol})
10+
remaining = Dict{Symbol, Any}()
11+
extracted = Dict{Symbol, Any}()
12+
for (key, value) in kwargs
13+
if key in wanted
14+
extracted[key] = value
15+
else
16+
remaining[key] = value
17+
end
18+
end
19+
return extracted, remaining
20+
end
21+
22+
for method in (:code_typed, :code_warntype, :code_llvm, :code_native)
23+
# only code_typed doesn't take a io argument
24+
args = method == :code_typed ? (:job,) : (:io, :job)
25+
26+
@eval begin
27+
function $method(io::IO, @nospecialize(func), @nospecialize(types);
28+
kernel::Bool=false, kwargs...)
29+
compiler_kwargs, kwargs = split_kwargs_runtime(kwargs, COMPILER_KWARGS)
30+
source = methodinstance(typeof(func), Base.to_tuple_type(types))
31+
config = compiler_config(cl.device(); kernel, compiler_kwargs...)
32+
job = CompilerJob(source, config)
33+
GPUCompiler.$method($(args...); kwargs...)
34+
end
35+
$method(@nospecialize(func), @nospecialize(types); kwargs...) =
36+
$method(stdout, func, types; kwargs...)
37+
end
38+
end
39+
40+
41+
42+
#
43+
# @device_code_* functions
44+
#
45+
46+
export @device_code_lowered, @device_code_typed, @device_code_warntype, @device_code_llvm,
47+
@device_code_native, @device_code
48+
49+
# forward to GPUCompiler
50+
@eval $(Symbol("@device_code_lowered")) = $(getfield(GPUCompiler, Symbol("@device_code_lowered")))
51+
@eval $(Symbol("@device_code_typed")) = $(getfield(GPUCompiler, Symbol("@device_code_typed")))
52+
@eval $(Symbol("@device_code_warntype")) = $(getfield(GPUCompiler, Symbol("@device_code_warntype")))
53+
@eval $(Symbol("@device_code_llvm")) = $(getfield(GPUCompiler, Symbol("@device_code_llvm")))
54+
@eval $(Symbol("@device_code_native")) = $(getfield(GPUCompiler, Symbol("@device_code_native")))
55+
@eval $(Symbol("@device_code")) = $(getfield(GPUCompiler, Symbol("@device_code")))
56+
57+
58+
#
59+
# other
60+
#
61+
62+
"""
63+
Metal.return_type(f, tt) -> r::Type
64+
65+
Return a type `r` such that `f(args...)::r` where `args::tt`.
66+
"""
67+
function return_type(@nospecialize(func), @nospecialize(tt))
68+
source = methodinstance(typeof(func), tt)
69+
config = compiler_config(cl.device())
70+
job = CompilerJob(source, config)
71+
interp = GPUCompiler.get_interpreter(job)
72+
sig = Base.signature_type(func, tt)
73+
Core.Compiler.return_type(interp, sig)
74+
end

0 commit comments

Comments
 (0)