diff --git a/src/driver.jl b/src/driver.jl index 9e05eb63..3b372e01 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -43,53 +43,48 @@ end export compile -# NOTE: the keyword arguments to compile/codegen control those aspects of compilation that -# might have to be changed (e.g. set libraries=false when recursing, or set -# strip=true for reflection). What remains defines the compilation job itself, -# and those values are contained in the CompilerJob struct. - # (::CompilerJob) const compile_hook = Ref{Union{Nothing,Function}}(nothing) """ - compile(target::Symbol, job::CompilerJob; kwargs...) - -Compile a function `f` invoked with types `tt` for device capability `cap` to one of the -following formats as specified by the `target` argument: `:julia` for Julia IR, `:llvm` for -LLVM IR and `:asm` for machine code. - -The following keyword arguments are supported: -- `toplevel`: indicates that this compilation is the outermost invocation of the compiler - (default: true) -- `libraries`: link the GPU runtime and `libdevice` libraries (default: true, if toplevel) -- `optimize`: optimize the code (default: true, if toplevel) -- `cleanup`: run cleanup passes on the code (default: true, if toplevel) -- `validate`: enable optional validation of input and outputs (default: true, if toplevel) -- `strip`: strip non-functional metadata and debug information (default: false) -- `only_entry`: only keep the entry function, remove all others (default: false). - This option is only for internal use, to implement reflection's `dump_module`. - -Other keyword arguments can be found in the documentation of [`cufunction`](@ref). + compile(target::Symbol, job::CompilerJob) + +Compile a `job` to one of the following formats as specified by the `target` argument: +`:julia` for Julia IR, `:llvm` for LLVM IR and `:asm` for machine code. """ function compile(target::Symbol, @nospecialize(job::CompilerJob); kwargs...) + # XXX: remove on next major version + if !isempty(kwargs) + Base.depwarn("The GPUCompiler `compile` API does not take keyword arguments anymore. Use CompilerConfig instead.", :compile) + config = CompilerConfig(job.config; kwargs...) + job = CompilerJob(job.source, config) + end + if compile_hook[] !== nothing compile_hook[](job) end - return codegen(target, job; kwargs...) + return compile_unhooked(target, job) end -function codegen(output::Symbol, @nospecialize(job::CompilerJob); toplevel::Bool=true, - libraries::Bool=toplevel, optimize::Bool=toplevel, cleanup::Bool=toplevel, - validate::Bool=toplevel, strip::Bool=false, only_entry::Bool=false, - parent_job::Union{Nothing, CompilerJob}=nothing) +# XXX: remove on next major version +function codegen(output::Symbol, @nospecialize(job::CompilerJob); kwargs...) + if !isempty(kwargs) + Base.depwarn("The GPUCompiler `codegen` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :codegen) + config = CompilerConfig(job.config; kwargs...) + job = CompilerJob(job.source, config) + end + compile_unhooked(output, job) +end + +function compile_unhooked(output::Symbol, @nospecialize(job::CompilerJob); kwargs...) if context(; throw_error=false) === nothing error("No active LLVM context. Use `JuliaContext()` do-block syntax to create one.") end @timeit_debug to "Validation" begin check_method(job) # not optional - validate && check_invocation(job) + job.config.validate && check_invocation(job) end prepare_job!(job) @@ -97,10 +92,10 @@ function codegen(output::Symbol, @nospecialize(job::CompilerJob); toplevel::Bool ## LLVM IR - ir, ir_meta = emit_llvm(job; libraries, toplevel, optimize, cleanup, only_entry, validate) + ir, ir_meta = emit_llvm(job) if output == :llvm - if strip + if job.config.strip @timeit_debug to "strip debug info" strip_debuginfo!(ir) end @@ -117,7 +112,7 @@ function codegen(output::Symbol, @nospecialize(job::CompilerJob); toplevel::Bool else error("Unknown assembly format $output") end - asm, asm_meta = emit_asm(job, ir; strip, validate, format) + asm, asm_meta = emit_asm(job, ir, format) if output == :asm || output == :obj return asm, (; asm_meta..., ir_meta..., ir) @@ -156,9 +151,14 @@ end const __llvm_initialized = Ref(false) -@locked function emit_llvm(@nospecialize(job::CompilerJob); toplevel::Bool, - libraries::Bool, optimize::Bool, cleanup::Bool, - validate::Bool, only_entry::Bool) +@locked function emit_llvm(@nospecialize(job::CompilerJob); kwargs...) + # XXX: remove on next major version + if !isempty(kwargs) + Base.depwarn("The GPUCompiler `emit_llvm` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :emit_llvm) + config = CompilerConfig(job.config; kwargs...) + job = CompilerJob(job.source, config) + end + if !__llvm_initialized[] InitializeAllTargets() InitializeAllTargetInfos() @@ -183,7 +183,8 @@ const __llvm_initialized = Ref(false) entry = finish_module!(job, ir, entry) # deferred code generation - has_deferred_jobs = toplevel && !only_entry && haskey(functions(ir), "deferred_codegen") + has_deferred_jobs = job.config.toplevel && !job.config.only_entry && + haskey(functions(ir), "deferred_codegen") jobs = Dict{CompilerJob, String}(job => entry_fn) if has_deferred_jobs dyn_marker = functions(ir)["deferred_codegen"] @@ -221,8 +222,8 @@ const __llvm_initialized = Ref(false) for dyn_job in keys(worklist) # cached compilation dyn_entry_fn = get!(jobs, dyn_job) do - dyn_ir, dyn_meta = codegen(:llvm, dyn_job; toplevel=false, - parent_job=job) + config = CompilerConfig(dyn_job.config; toplevel=false) + dyn_ir, dyn_meta = codegen(:llvm, CompilerJob(dyn_job; config)) dyn_entry_fn = LLVM.name(dyn_meta.entry) merge!(compiled, dyn_meta.compiled) @assert context(dyn_ir) == context(ir) @@ -258,7 +259,7 @@ const __llvm_initialized = Ref(false) erase!(dyn_marker) end - if libraries + if job.config.toplevel && job.config.libraries # load the runtime outside of a timing block (because it recurses into the compiler) if !uses_julia_runtime(job) runtime = load_runtime(job) @@ -284,7 +285,7 @@ const __llvm_initialized = Ref(false) # mark everything internal except for entrypoints and any exported # global variables. this makes sure that the optimizer can, e.g., # rewrite function signatures. - if toplevel + if job.config.toplevel preserved_gvs = collect(values(jobs)) for gvar in globals(ir) if linkage(gvar) == LLVM.API.LLVMExternalLinkage @@ -310,7 +311,7 @@ const __llvm_initialized = Ref(false) # so that we can reconstruct the CompileJob instead of setting it globally end - if optimize + if job.config.toplevel && job.config.optimize @timeit_debug to "optimization" begin optimize!(job, ir; job.config.opt_level) @@ -337,7 +338,7 @@ const __llvm_initialized = Ref(false) entry = functions(ir)[entry_fn] end - if cleanup + if job.config.toplevel && job.config.cleanup @timeit_debug to "clean-up" begin @dispose pb=NewPMPassBuilder() begin add!(pb, RecomputeGlobalsAAPass()) @@ -355,7 +356,7 @@ const __llvm_initialized = Ref(false) # we want to finish the module after optimization, so we cannot do so # during deferred code generation. instead, process the deferred jobs # here. - if toplevel + if job.config.toplevel entry = finish_ir!(job, ir, entry) for (job′, fn′) in jobs @@ -367,7 +368,7 @@ const __llvm_initialized = Ref(false) # replace non-entry function definitions with a declaration # NOTE: we can't do this before optimization, because the definitions of called # functions may affect optimization. - if only_entry + if job.config.only_entry for f in functions(ir) f == entry && continue isdeclaration(f) && continue @@ -377,7 +378,7 @@ const __llvm_initialized = Ref(false) end end - if validate + if job.config.toplevel && job.config.validate @timeit_debug to "Validation" begin check_ir(job, ir) end @@ -390,10 +391,10 @@ const __llvm_initialized = Ref(false) return ir, (; entry, compiled) end -@locked function emit_asm(@nospecialize(job::CompilerJob), ir::LLVM.Module; - strip::Bool, validate::Bool, format::LLVM.API.LLVMCodeGenFileType) +@locked function emit_asm(@nospecialize(job::CompilerJob), ir::LLVM.Module, + format::LLVM.API.LLVMCodeGenFileType) # NOTE: strip after validation to get better errors - if strip + if job.config.strip @timeit_debug to "Debug info removal" strip_debuginfo!(ir) end diff --git a/src/execution.jl b/src/execution.jl index 95fc7a24..9b4940a7 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -8,12 +8,20 @@ export split_kwargs, assign_args! # split keyword arguments expressions into groups. returns vectors of keyword argument # values, one more than the number of groups (unmatched keywords in the last vector). # intended for use in macros; the resulting groups can be used in expressions. +# can be used at run time, but not in performance critical code. function split_kwargs(kwargs, kw_groups...) kwarg_groups = ntuple(_->[], length(kw_groups) + 1) for kwarg in kwargs # decode - Meta.isexpr(kwarg, :(=)) || throw(ArgumentError("non-keyword argument like option '$kwarg'")) - key, val = kwarg.args + if Meta.isexpr(kwarg, :(=)) + # use in macros + key, val = kwarg.args + elseif kwarg isa Pair{Symbol,<:Any} + # use in functions + key, val = kwarg + else + throw(ArgumentError("non-keyword argument like option '$kwarg'")) + end isa(key, Symbol) || throw(ArgumentError("non-symbolic keyword '$key'")) # find a matching group @@ -182,7 +190,7 @@ end end struct DiskCacheEntry - src::Type # Originally MethodInstance, but upon deserialize they were not uniqued... + src::Type # Originally MethodInstance, but upon deserialize they were not uniqued... cfg::CompilerConfig asm end @@ -262,7 +270,16 @@ end obj = linker(job, asm) if ci === nothing - ci = ci_cache_lookup(ci_cache(job), src, world, world)::CodeInstance + ci = ci_cache_lookup(ci_cache(job), src, world, world) + if ci === nothing + error("""Did not find CodeInstance for $job. + + Pleaase make sure that the `compiler` function passed to `cached_compilation` + invokes GPUCompiler with exactly the same configuration as passed to the API. + + Note that you should do this by calling `GPUCompiler.compile`, and not by + using reflection functions (which alter the compiler configuration).""") + end key = (ci, cfg) end cache[key] = obj diff --git a/src/interface.jl b/src/interface.jl index 6de28906..f9c655bf 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -63,6 +63,9 @@ export CompilerConfig # the configuration of the compiler +const CONFIG_KWARGS = [:kernel, :name, :entry_abi, :always_inline, :opt_level, + :libraries, :optimize, :cleanup, :validate, :strip] + """ CompilerConfig(target, params; kernel=true, entry_abi=:specfunc, name=nothing, always_inline=false) @@ -72,20 +75,27 @@ and `params`. Several keyword arguments can be used to customize the compilation process: -- `kernel`: specifies if the function should be compiled as a kernel, or as a regular - function. This is used to determine the calling convention and for validation purposes. -- `entry_abi`: can be either `:specfunc` the default, or `:func`. `:specfunc` expects the - arguments to be passed in registers, simple return values are returned in registers as - well, and complex return values are returned on the stack using `sret`, the calling - convention is `fastcc`. The `:func` abi is simpler with a calling convention of the first - argument being the function itself (to support closures), the second argument being a - pointer to a vector of boxed Julia values and the third argument being the number of - values, the return value will also be boxed. The `:func` abi will internally call the - `:specfunc` abi, but is generally easier to invoke directly. +- `kernel`: specifies if the function should be compiled as a kernel (the default) or as a + plain function. This toggles certain optimizations, rewrites and validations. - `name`: the name that will be used for the entrypoint function. If `nothing` (the default), the name will be generated automatically. +- `entry_abi`: can be either `:specfunc` (the default), or `:func`. + - `:specfunc` expects the arguments to be passed in registers, simple return values are + returned in registers as well, and complex return values are returned on the stack + using `sret`, the calling convention is `fastcc`. + - The `:func` abi is simpler with a calling convention of the first argument being the + function itself (to support closures), the second argument being a pointer to a vector + of boxed Julia values and the third argument being the number of values, the return + value will also be boxed. The `:func` abi will internally call the `:specfunc` abi, but + is generally easier to invoke directly. - `always_inline` specifies if the Julia front-end should inline all functions into one if possible. +- `opt_level`: the optimization level to use (default: 2) +- `libraries`: link the GPU runtime and `libdevice` libraries (default: true) +- `optimize`: optimize the code (default: true) +- `cleanup`: run cleanup passes on the code (default: true) +- `validate`: enable optional validation of input and outputs (default: true) +- `strip`: strip non-functional metadata and debug information (default: false) """ struct CompilerConfig{T,P} target::T @@ -96,27 +106,49 @@ struct CompilerConfig{T,P} entry_abi::Symbol always_inline::Bool opt_level::Int - - function CompilerConfig(target::AbstractCompilerTarget, - params::AbstractCompilerParams; - kernel=true, - name=nothing, - entry_abi=:specfunc, - always_inline=false, - opt_level=2) + libraries::Bool + optimize::Bool + cleanup::Bool + validate::Bool + strip::Bool + + # internal + toplevel::Bool + only_entry::Bool + + function CompilerConfig(target::AbstractCompilerTarget, params::AbstractCompilerParams; + kernel=true, name=nothing, entry_abi=:specfunc, toplevel=true, + always_inline=false, opt_level=2, optimize=toplevel, + libraries=toplevel, cleanup=toplevel, validate=toplevel, + strip=false, only_entry=false) if entry_abi ∉ (:specfunc, :func) error("Unknown entry_abi=$entry_abi") end new{typeof(target), typeof(params)}(target, params, kernel, name, entry_abi, - always_inline, opt_level) + always_inline, opt_level, libraries, optimize, + cleanup, validate, strip, toplevel, only_entry) end end # copy constructor -CompilerConfig(cfg::CompilerConfig; target=cfg.target, params=cfg.params, - kernel=cfg.kernel, name=cfg.name, entry_abi=cfg.entry_abi, - always_inline=cfg.always_inline, opt_level=cfg.opt_level) = - CompilerConfig(target, params; kernel, entry_abi, name, always_inline, opt_level) +function CompilerConfig(cfg::CompilerConfig; target=cfg.target, params=cfg.params, + kernel=cfg.kernel, name=cfg.name, entry_abi=cfg.entry_abi, + always_inline=cfg.always_inline, opt_level=cfg.opt_level, + libraries=cfg.libraries, optimize=cfg.optimize, cleanup=cfg.cleanup, + validate=cfg.validate, strip=cfg.strip, toplevel=cfg.toplevel, + only_entry=cfg.only_entry) + # deriving a non-toplevel job disables certain features + # XXX: should we keep track if any of these were set explicitly in the first place? + # see how PkgEval does that. + if !toplevel + optimize = false + libraries = false + cleanup = false + validate = false + end + CompilerConfig(target, params; kernel, entry_abi, name, always_inline, opt_level, + libraries, optimize, cleanup, validate, strip, toplevel, only_entry) +end function Base.show(io::IO, @nospecialize(cfg::CompilerConfig{T})) where {T} print(io, "CompilerConfig for ", T) @@ -131,6 +163,13 @@ function Base.hash(cfg::CompilerConfig, h::UInt) h = hash(cfg.entry_abi, h) h = hash(cfg.always_inline, h) h = hash(cfg.opt_level, h) + h = hash(cfg.libraries, h) + h = hash(cfg.optimize, h) + h = hash(cfg.cleanup, h) + h = hash(cfg.validate, h) + h = hash(cfg.strip, h) + h = hash(cfg.toplevel, h) + h = hash(cfg.only_entry, h) return h end @@ -144,16 +183,26 @@ using Core: MethodInstance # a specific invocation of the compiler, bundling everything needed to generate code +""" + CompilerJob(source::MethodInstance, config::CompilerConfig, [world=tls_world_age()]) + +Construct a `CompilerJob` that will be used to drive compilation for the given `source` and +`config` in a given `world`. +""" struct CompilerJob{T,P} source::MethodInstance config::CompilerConfig{T,P} world::UInt - CompilerJob(src::MethodInstance, cfg::CompilerConfig{T,P}, + CompilerJob(source::MethodInstance, config::CompilerConfig{T,P}, world=tls_world_age()) where {T,P} = - new{T,P}(src, cfg, world) + new{T,P}(source, config, world) end +# copy constructor +CompilerJob(job::CompilerJob; source=job.source, config=job.config, world=job.world) = + CompilerJob(source, config, world) + function Base.hash(job::CompilerJob, h::UInt) h = hash(job.source, h) h = hash(job.config, h) diff --git a/src/precompile.jl b/src/precompile.jl index 2921f24c..8f62451b 100644 --- a/src/precompile.jl +++ b/src/precompile.jl @@ -26,13 +26,13 @@ using PrecompileTools: @setup_workload, @compile_workload source = methodinstance(typeof(kernel), Tuple{}) target = NativeCompilerTarget() params = precompile_module.DummyCompilerParams() - config = CompilerConfig(target, params) + # XXX: on Windows, compiling the GPU runtime leaks GPU code in the native cache, + # so prevent building the runtime library (see JuliaGPU/GPUCompiler.jl#601) + config = CompilerConfig(target, params; libraries=false) job = CompilerJob(source, config) JuliaContext() do ctx - # XXX: on Windows, compiling the GPU runtime leaks GPU code in the native cache, - # so prevent building the runtime library (see JuliaGPU/GPUCompiler.jl#601) - GPUCompiler.compile(:asm, job; libraries=false) + GPUCompiler.compile(:asm, job) end end diff --git a/src/reflection.jl b/src/reflection.jl index 915e7af3..df1a0a43 100644 --- a/src/reflection.jl +++ b/src/reflection.jl @@ -186,8 +186,9 @@ See also: [`@device_code_llvm`](@ref), `InteractiveUtils.code_llvm` function code_llvm(io::IO, @nospecialize(job::CompilerJob); optimize::Bool=true, raw::Bool=false, debuginfo::Symbol=:default, dump_module::Bool=false, kwargs...) # NOTE: jl_dump_function_ir supports stripping metadata, so don't do it in the driver + config = CompilerConfig(job.config; validate=false, strip=false) str = JuliaContext() do ctx - ir, meta = compile(:llvm, job; optimize=optimize, strip=false, validate=false, kwargs...) + ir, meta = compile(:llvm, CompilerJob(job; config)) ts_mod = ThreadSafeModule(ir) entry_fn = meta.entry GC.@preserve ts_mod entry_fn begin @@ -214,9 +215,11 @@ The following keyword arguments are supported: See also: [`@device_code_native`](@ref), `InteractiveUtils.code_llvm` """ -function code_native(io::IO, @nospecialize(job::CompilerJob); raw::Bool=false, dump_module::Bool=false) +function code_native(io::IO, @nospecialize(job::CompilerJob); + raw::Bool=false, dump_module::Bool=false) + config = CompilerConfig(job.config; strip=!raw, only_entry=!dump_module, validate=false) asm, meta = JuliaContext() do ctx - compile(:asm, job; strip=!raw, only_entry=!dump_module, validate=false) + compile(:asm, CompilerJob(job; config)) end highlight(io, asm, source_code(job.config.target)) end diff --git a/src/rtlib.jl b/src/rtlib.jl index 88f366b9..42faaebf 100644 --- a/src/rtlib.jl +++ b/src/rtlib.jl @@ -68,7 +68,7 @@ end function emit_function!(mod, config::CompilerConfig, f, method) tt = Base.to_tuple_type(method.types) source = generic_methodinstance(f, tt) - new_mod, meta = codegen(:llvm, CompilerJob(source, config); toplevel=false) + new_mod, meta = compile_unhooked(:llvm, CompilerJob(source, config)) ft = function_type(meta.entry) expected_ft = convert(LLVM.FunctionType, method) if return_type(ft) != return_type(expected_ft) @@ -99,7 +99,7 @@ function build_runtime(@nospecialize(job::CompilerJob)) # the compiler job passed into here is identifies the job that requires the runtime. # derive a job that represents the runtime itself (notably with kernel=false). - config = CompilerConfig(job.config; kernel=false) + config = CompilerConfig(job.config; kernel=false, toplevel=false) for method in values(Runtime.methods) def = if isa(method.def, Symbol) diff --git a/src/spirv.jl b/src/spirv.jl index 45212173..7e27c346 100644 --- a/src/spirv.jl +++ b/src/spirv.jl @@ -184,8 +184,9 @@ end # reimplementation that uses `spirv-dis`, giving much more pleasant output function code_native(io::IO, job::CompilerJob{SPIRVCompilerTarget}; raw::Bool=false, dump_module::Bool=false) + config = CompilerConfig(job.config; strip=!raw, only_entry=!dump_module, validate=false) obj, _ = JuliaContext() do ctx - compile(:obj, job; strip=!raw, only_entry=!dump_module, validate=false) + compile(:obj, CompilerJob(job; config)) end mktemp() do input_path, input_io write(input_io, obj) diff --git a/test/helpers/bpf.jl b/test/helpers/bpf.jl index 49d9d6e4..d66b6b48 100644 --- a/test/helpers/bpf.jl +++ b/test/helpers/bpf.jl @@ -6,12 +6,12 @@ import ..TestRuntime struct CompilerParams <: AbstractCompilerParams end GPUCompiler.runtime_module(::CompilerJob{<:Any,CompilerParams}) = TestRuntime -function create_job(@nospecialize(func), @nospecialize(types); - kernel::Bool=false, always_inline=false, kwargs...) +function create_job(@nospecialize(func), @nospecialize(types); kwargs...) + config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter()) target = BPFCompilerTarget() params = CompilerParams() - config = CompilerConfig(target, params; kernel, always_inline) + config = CompilerConfig(target, params; kernel=false, config_kwargs...) CompilerJob(source, config), kwargs end diff --git a/test/helpers/gcn.jl b/test/helpers/gcn.jl index 2cb371e9..f7f54f85 100644 --- a/test/helpers/gcn.jl +++ b/test/helpers/gcn.jl @@ -6,12 +6,12 @@ import ..TestRuntime struct CompilerParams <: AbstractCompilerParams end GPUCompiler.runtime_module(::CompilerJob{<:Any,CompilerParams}) = TestRuntime -function create_job(@nospecialize(func), @nospecialize(types); - kernel::Bool=false, always_inline=false, kwargs...) +function create_job(@nospecialize(func), @nospecialize(types); kwargs...) + config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter()) target = GCNCompilerTarget(dev_isa="gfx900") params = CompilerParams() - config = CompilerConfig(target, params; kernel, always_inline) + config = CompilerConfig(target, params; kernel=false, config_kwargs...) CompilerJob(source, config), kwargs end diff --git a/test/helpers/metal.jl b/test/helpers/metal.jl index c45ba4c4..d46f9a89 100644 --- a/test/helpers/metal.jl +++ b/test/helpers/metal.jl @@ -6,12 +6,12 @@ import ..TestRuntime struct CompilerParams <: AbstractCompilerParams end GPUCompiler.runtime_module(::CompilerJob{<:Any,CompilerParams}) = TestRuntime -function create_job(@nospecialize(func), @nospecialize(types); - kernel::Bool=false, always_inline=false, kwargs...) +function create_job(@nospecialize(func), @nospecialize(types); kwargs...) + config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter()) target = MetalCompilerTarget(; macos=v"12.2", metal=v"3.0", air=v"3.0") params = CompilerParams() - config = CompilerConfig(target, params; kernel, always_inline) + config = CompilerConfig(target, params; kernel=false, config_kwargs...) CompilerJob(source, config), kwargs end diff --git a/test/helpers/native.jl b/test/helpers/native.jl index c1c39ba0..d53ff172 100644 --- a/test/helpers/native.jl +++ b/test/helpers/native.jl @@ -20,13 +20,13 @@ GPUCompiler.runtime_module(::NativeCompilerJob) = TestRuntime GPUCompiler.method_table(@nospecialize(job::NativeCompilerJob)) = job.config.params.method_table GPUCompiler.can_safepoint(@nospecialize(job::NativeCompilerJob)) = job.config.params.entry_safepoint -function create_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, - entry_abi=:specfunc, entry_safepoint::Bool=false, always_inline=false, - method_table=test_method_table, kwargs...) +function create_job(@nospecialize(func), @nospecialize(types); + entry_safepoint::Bool=false, method_table=test_method_table, kwargs...) + config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter()) target = NativeCompilerTarget() params = CompilerParams(entry_safepoint, method_table) - config = CompilerConfig(target, params; kernel, entry_abi, always_inline) + config = CompilerConfig(target, params; kernel=false, config_kwargs...) CompilerJob(source, config), kwargs end @@ -71,7 +71,7 @@ const runtime_cache = Dict{Any, Any}() function compiler(job) JuliaContext() do ctx - GPUCompiler.compile(:asm, job, validate=false) + GPUCompiler.compile(:asm, job) end end @@ -81,7 +81,7 @@ end # simulates cached codegen function cached_execution(@nospecialize(func), @nospecialize(types); kwargs...) - job, kwargs = create_job(func, types; kwargs...) + job, kwargs = create_job(func, types; validate=false, kwargs...) GPUCompiler.cached_compilation(runtime_cache, job.source, job.config, compiler, linker) end diff --git a/test/helpers/ptx.jl b/test/helpers/ptx.jl index a9f58871..5f8a3c48 100644 --- a/test/helpers/ptx.jl +++ b/test/helpers/ptx.jl @@ -35,15 +35,15 @@ module PTXTestRuntime end GPUCompiler.runtime_module(::PTXCompilerJob) = PTXTestRuntime -function create_job(@nospecialize(func), @nospecialize(types); kernel::Bool=false, - minthreads=nothing, maxthreads=nothing, blocks_per_sm=nothing, - maxregs=nothing, always_inline=false, kwargs...) +function create_job(@nospecialize(func), @nospecialize(types); + minthreads=nothing, maxthreads=nothing, + blocks_per_sm=nothing, maxregs=nothing, + kwargs...) + config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter()) - target = PTXCompilerTarget(;cap=v"7.0", - minthreads, maxthreads, - blocks_per_sm, maxregs) + target = PTXCompilerTarget(; cap=v"7.0", minthreads, maxthreads, blocks_per_sm, maxregs) params = CompilerParams() - config = CompilerConfig(target, params; kernel, always_inline) + config = CompilerConfig(target, params; kernel=false, config_kwargs...) CompilerJob(source, config), kwargs end diff --git a/test/helpers/spirv.jl b/test/helpers/spirv.jl index 761cd495..73d030d1 100644 --- a/test/helpers/spirv.jl +++ b/test/helpers/spirv.jl @@ -7,14 +7,14 @@ struct CompilerParams <: AbstractCompilerParams end GPUCompiler.runtime_module(::CompilerJob{<:Any,CompilerParams}) = TestRuntime function create_job(@nospecialize(func), @nospecialize(types); - kernel::Bool=false, always_inline=false, - supports_fp16=true, supports_fp64=true, - backend::Symbol, kwargs...) + supports_fp16=true, supports_fp64=true, backend::Symbol, + kwargs...) + config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter()) target = SPIRVCompilerTarget(; backend, validate=true, optimize=true, supports_fp16, supports_fp64) params = CompilerParams() - config = CompilerConfig(target, params; kernel, always_inline) + config = CompilerConfig(target, params; kernel=false, config_kwargs...) CompilerJob(source, config), kwargs end diff --git a/test/metal.jl b/test/metal.jl index db626435..ae854353 100644 --- a/test/metal.jl +++ b/test/metal.jl @@ -114,7 +114,7 @@ end return end - @test_throws_message(InvalidIRError, Metal.code_llvm(devnull, kernel2, Tuple{Core.LLVMPtr{Float64,1}}; validate=true)) do msg + @test_throws_message(InvalidIRError, Metal.code_execution(kernel2, Tuple{Core.LLVMPtr{Float64,1}})) do msg occursin("unsupported use of double value", msg) end end diff --git a/test/native.jl b/test/native.jl index 4772c49e..19a6297c 100644 --- a/test/native.jl +++ b/test/native.jl @@ -50,10 +50,10 @@ end @noinline inner(x) = x+1 foo(x) = sum(inner, fill(x, 10, 10)) - job, _ = Native.create_job(foo, (Float64,)) + job, _ = Native.create_job(foo, (Float64,); validate=false) JuliaContext() do ctx # shouldn't segfault - ir, meta = GPUCompiler.compile(:llvm, job; validate=false) + ir, meta = GPUCompiler.compile(:llvm, job) meth = only(methods(foo, (Float64,))) @@ -87,8 +87,10 @@ end invocations = Ref(0) function compiler(job) invocations[] += 1 - ir = sprint(io->GPUCompiler.code_llvm(io, job)) - return ir + JuliaContext() do ctx + ir, ir_meta = GPUCompiler.compile(:llvm, job) + string(ir) + end end linker(job, compiled) = compiled cache = Dict() diff --git a/test/native/precompile.jl b/test/native/precompile.jl index c2648701..6fe981a5 100644 --- a/test/native/precompile.jl +++ b/test/native/precompile.jl @@ -56,7 +56,7 @@ precompile_test_harness("Inference caching") do load_path GPUCompiler.enable_disk_cache!() @test GPUCompiler.disk_cache_enabled() == true - job, _ = NativeCompiler.Native.create_job(NativeBackend.kernel, (Vector{Int}, Int)) + job, _ = NativeCompiler.Native.create_job(NativeBackend.kernel, (Vector{Int}, Int); validate=false) @assert job.source == kernel_mi ci = GPUCompiler.ci_cache_lookup(GPUCompiler.ci_cache(job), job.source, job.world, job.world) @assert ci !== nothing diff --git a/test/spirv.jl b/test/spirv.jl index 2d7fb841..e14ccf77 100644 --- a/test/spirv.jl +++ b/test/spirv.jl @@ -48,28 +48,28 @@ end end ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float16}, Float16}; - backend, validate=true)) + backend)) @test occursin("store half", ir) ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float32}, Float32}; - backend, validate=true)) + backend)) @test occursin("store float", ir) ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float64}, Float64}; - backend, validate=true)) + backend)) @test occursin("store double", ir) @test_throws_message(InvalidIRError, - SPIRV.code_llvm(devnull, mod.kernel, Tuple{Ptr{Float16}, Float16}; - backend, supports_fp16=false, validate=true)) do msg + SPIRV.code_execution(mod.kernel, Tuple{Ptr{Float16}, Float16}; + backend, supports_fp16=false)) do msg occursin("unsupported use of half value", msg) && occursin("[1] unsafe_store!", msg) && occursin("[2] kernel", msg) end @test_throws_message(InvalidIRError, - SPIRV.code_llvm(devnull, mod.kernel, Tuple{Ptr{Float64}, Float64}; - backend, supports_fp64=false, validate=true)) do msg + SPIRV.code_execution(mod.kernel, Tuple{Ptr{Float64}, Float64}; + backend, supports_fp64=false)) do msg occursin("unsupported use of double value", msg) && occursin("[1] unsafe_store!", msg) && occursin("[2] kernel", msg)