diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml index 544b0a4..617972e 100644 --- a/.github/workflows/ci-integration.yml +++ b/.github/workflows/ci-integration.yml @@ -18,9 +18,9 @@ jobs: fail-fast: false matrix: version: - - '1.8' - '1.9' - - '1.10.0-rc1' + - '1.10' + - '1.11' os: - ubuntu-latest - macOS-latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 499223c..1d1718a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,9 +18,9 @@ jobs: fail-fast: false matrix: version: - - '1.8' - '1.9' - '1.10' + - '1.11' os: - ubuntu-latest - macOS-latest diff --git a/Project.toml b/Project.toml index b4d7d40..607bd31 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.7.2" +version = "0.7.3" [deps] @@ -18,11 +18,11 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] CodeInfoTools = "0.3" -GPUCompiler = "0.21, 0.22, 0.23, 0.24, 0.25, 0.26" -LLVM = "6" +GPUCompiler = "0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 1.5, 1" +LLVM = "6, 7, 8, 9" MacroTools = "0.5" StaticTools = "0.8" -julia = "1.8, 1.9" +julia = "1.8, 1.9, 1.10, 1.11" [extras] Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index 90e3404..dce0ca7 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -1,3 +1,5 @@ +#__precompile__(false) + module StaticCompiler using InteractiveUtils using GPUCompiler: GPUCompiler @@ -19,10 +21,10 @@ export static_code_llvm, static_code_typed, static_llvm_module, static_code_nati export @device_override, @print_and_throw export StaticTarget +include("quirks.jl") include("interpreter.jl") include("target.jl") include("pointer_warning.jl") -include("quirks.jl") include("dllexport.jl") fix_name(f::Function) = fix_name(string(nameof(f))) @@ -450,9 +452,9 @@ function static_llvm_module(f, tt, name=fix_name(f); demangle=true, target::Stat if !demangle name = "julia_"*name end - job, kwargs = static_job(f, tt; name, target, kwargs...) + job, kwargs = static_job(f, tt; name, target, strip=true, only_entry=false, validate=false, libraries=false, kwargs...) m = GPUCompiler.JuliaContext() do context - m, _ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, libraries=false) + m, _ = GPUCompiler.compile(:llvm, job; kwargs...) locate_pointers_and_runtime_calls(m) m end @@ -467,8 +469,8 @@ function static_llvm_module(funcs::Union{Array,Tuple}; demangle=true, target::St if !demangle name_f = "julia_"*name_f end - job, kwargs = static_job(f, tt; name = name_f, target, kwargs...) - mod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, libraries=false) + job, kwargs = static_job(f, tt; name = name_f, target, strip=true, only_entry=false, validate=false, libraries=false, kwargs...) + mod,_ = GPUCompiler.compile(:llvm, job; kwargs...) if length(funcs) > 1 for func in funcs[2:end] f,tt = func @@ -476,8 +478,8 @@ function static_llvm_module(funcs::Union{Array,Tuple}; demangle=true, target::St if !demangle name_f = "julia_"*name_f end - job, kwargs = static_job(f, tt; name = name_f, target, kwargs...) - tmod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, libraries=false) + job, kwargs = static_job(f, tt; name = name_f, target, strip=true, only_entry=false, validate=false, libraries=false, kwargs...) + tmod,_ = GPUCompiler.compile(:llvm, job; kwargs...) link!(mod,tmod) end end @@ -587,8 +589,16 @@ function generate_obj(funcs::Union{Array,Tuple}, path::String = tempname(), file obj_path = joinpath(path, "$filenamebase.o") obj = GPUCompiler.JuliaContext() do ctx fakejob, _ = static_job(f, tt; target, kwargs...) + @static if VERSION < v"1.9" obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) - obj + else + @static if pkgversion(GPUCompiler) < v"1.3.0" + obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) + else + obj, _ = GPUCompiler.emit_asm(fakejob, mod, LLVM.API.LLVMObjectFile) + end + end + obj end open(obj_path, "w") do io write(io, obj) diff --git a/src/interpreter.jl b/src/interpreter.jl index 344cc53..e1d5656 100644 --- a/src/interpreter.jl +++ b/src/interpreter.jl @@ -1,61 +1,68 @@ ## interpreter using Core.Compiler: - AbstractInterpreter, InferenceResult, InferenceParams, InferenceState, MethodInstance, OptimizationParams, WorldView, get_world_counter + AbstractInterpreter, InferenceResult, InferenceParams, InferenceState, MethodInstance, OptimizationParams, WorldView using GPUCompiler: - @safe_debug, AbstractCompilerParams, CodeCache, CompilerJob, methodinstance + @safe_debug, AbstractCompilerParams, CompilerJob, methodinstance, CodeInstance, inference_params, optimization_params, get_inference_world using CodeInfoTools using CodeInfoTools: resolve + +const HAS_INTEGRATED_CACHE = GPUCompiler.HAS_INTEGRATED_CACHE +@static if HAS_INTEGRATED_CACHE + const CodeCache = Nothing + +else + using GPUCompiler: CodeCache +end + +# https://github.com/JuliaGPU/GPUCompiler.jl/src/jlgen.jl8#L322 +# as from struct GPUInterpreter <: CC.AbstractInterpreter struct StaticInterpreter <: AbstractInterpreter - global_cache::CodeCache + # The world age we're working inside of + world::UInt method_table::Union{Nothing,Core.MethodTable} + @static if HAS_INTEGRATED_CACHE + token::Any + else + code_cache::CodeCache # global cache + end + # Cache of inference results for this particular interpreter local_cache::Vector{InferenceResult} - # The world age we're working inside of - world::UInt # Parameters for inference and optimization inf_params::InferenceParams opt_params::OptimizationParams - - function StaticInterpreter(cache::CodeCache, mt::Union{Nothing,Core.MethodTable}, world::UInt, ip::InferenceParams, op::OptimizationParams) + # token_or_cache = token::Any, code_cache::CodeCache + function StaticInterpreter(world::UInt, mt::Union{Nothing,Core.MethodTable}, token_or_cache, ip::InferenceParams, op::OptimizationParams) @assert world <= Base.get_world_counter() - - return new( - cache, - mt, - - # Initially empty cache - Vector{InferenceResult}(), - - # world age counter - world, - - # parameters for inference and optimization - ip, - op - ) + # mt = get_method_table_view(world, mt) + local_cache = Vector{Core.Compiler.InferenceResult}() # Initially empty cache + return new(world, mt, token_or_cache, local_cache, ip, op) end end - Core.Compiler.InferenceParams(interp::StaticInterpreter) = interp.inf_params Core.Compiler.OptimizationParams(interp::StaticInterpreter) = interp.opt_params -Core.Compiler.get_world_counter(interp::StaticInterpreter) = interp.world +# Core.Compiler.get_world_counter(interp::StaticInterpreter) = interp.world +GPUCompiler.get_inference_world(interp::StaticInterpreter) = interp.world Core.Compiler.get_inference_cache(interp::StaticInterpreter) = interp.local_cache -Core.Compiler.code_cache(interp::StaticInterpreter) = WorldView(interp.global_cache, interp.world) +@static if HAS_INTEGRATED_CACHE + Core.Compiler.cache_owner(interp::StaticInterpreter) = interp.token +else + Core.Compiler.code_cache(interp::StaticInterpreter) = WorldView(interp.code_cache, interp.world) +end # No need to do any locking since we're not putting our results into the runtime cache Core.Compiler.lock_mi_inference(interp::StaticInterpreter, mi::MethodInstance) = nothing Core.Compiler.unlock_mi_inference(interp::StaticInterpreter, mi::MethodInstance) = nothing function Core.Compiler.add_remark!(interp::StaticInterpreter, sv::InferenceState, msg) - @safe_debug "Inference remark during static compilation of $(sv.linfo): $msg" + @safe_debug "Inference remark during static compilation of $(sv.linfo): $msg" end - ##### ##### Pre-inference ##### @@ -77,16 +84,19 @@ function custom_pass!(interp::StaticInterpreter, result::InferenceResult, mi::Co end function Core.Compiler.InferenceState(result::InferenceResult, cache::Symbol, interp::StaticInterpreter) - world = get_world_counter(interp) - src = @static if VERSION >= v"1.10.0-DEV.873" + world = get_inference_world(interp) + src = @static if VERSION >= v"1.10.0-DEV.873" Core.Compiler.retrieve_code_info(result.linfo, world) else - Core.Compiler.retrieve_code_info(result.linfo) + Core.Compiler.retrieve_code_info(result.linfo) end mi = result.linfo src = custom_pass!(interp, result, mi, src) - src === nothing && return nothing - Core.Compiler.validate_code_in_debug_mode(result.linfo, src, "lowered") + src === nothing && return @static if VERSION < v"1.11" + Core.Compiler.validate_code_in_debug_mode(result.linfo, src, "lowered") + else + Core.Compiler.maybe_validate_code(result.linfo, src, "lowered") + end return InferenceState(result, src, cache, interp) end @@ -95,7 +105,6 @@ Core.Compiler.may_compress(interp::StaticInterpreter) = true Core.Compiler.may_discard_trees(interp::StaticInterpreter) = true Core.Compiler.verbose_stmt_info(interp::StaticInterpreter) = false - if isdefined(Base.Experimental, Symbol("@overlay")) using Core.Compiler: OverlayMethodTable if v"1.8-beta2" <= VERSION < v"1.9-" || VERSION >= v"1.9.0-DEV.120" @@ -112,13 +121,13 @@ end # semi-concrete interepretation is broken with overlays (JuliaLang/julia#47349) @static if VERSION >= v"1.9.0-DEV.1248" -function Core.Compiler.concrete_eval_eligible(interp::StaticInterpreter, - @nospecialize(f), result::Core.Compiler.MethodCallResult, arginfo::Core.Compiler.ArgInfo) - ret = @invoke Core.Compiler.concrete_eval_eligible(interp::AbstractInterpreter, - f::Any, result::Core.Compiler.MethodCallResult, arginfo::Core.Compiler.ArgInfo) - ret === false && return nothing - return ret -end + function Core.Compiler.concrete_eval_eligible(interp::StaticInterpreter, + @nospecialize(f), result::Core.Compiler.MethodCallResult, arginfo::Core.Compiler.ArgInfo) + ret = @invoke Core.Compiler.concrete_eval_eligible(interp::AbstractInterpreter, + f::Any, result::Core.Compiler.MethodCallResult, arginfo::Core.Compiler.ArgInfo) + ret === false && return nothing + return ret + end end struct StaticCompilerParams <: AbstractCompilerParams @@ -127,8 +136,9 @@ struct StaticCompilerParams <: AbstractCompilerParams cache::CodeCache end -function StaticCompilerParams(; opt = false, - optlevel = Base.JLOptions().opt_level, - cache = CodeCache()) +function StaticCompilerParams(; opt=false, + optlevel=Base.JLOptions().opt_level, + cache=CodeCache() +) return StaticCompilerParams(opt, optlevel, cache) end diff --git a/src/pointer_warning.jl b/src/pointer_warning.jl index 9f8f30c..11a3881 100644 --- a/src/pointer_warning.jl +++ b/src/pointer_warning.jl @@ -30,8 +30,9 @@ function locate_pointers_and_runtime_calls(mod) end end if warned + lines = split(string(func),"\n") @warn("LLVM function generated warnings due to raw pointers embedded in the code. This will likely cause errors or undefined behaviour.", - func = func) + func = join(lines[1:min(20, end)], "\n")) # just print the first 20 lines end end end diff --git a/src/quirks.jl b/src/quirks.jl index f279d94..1b9480a 100644 --- a/src/quirks.jl +++ b/src/quirks.jl @@ -1,10 +1,41 @@ -libcexit(x::Int32) = @symbolcall exit(x::Int32)::Nothing +@static if isdefined(Base.Experimental, Symbol("@overlay")) + Base.Experimental.@MethodTable(method_table) + Base.Experimental.@MethodTable(empty_table) +else + const method_table = nothing +end + +""" +```julia +@device_override old_bad_method(arg1::Type1, arg2::Type2) = new_good_method(arg1, arg2) +``` +Override a non-static-compilable method (e.g. `old_bad_method(::Type1, ::Type2)`) +with a more compileable replacement. +### Examples +``` +@device_override @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = + @print_and_throw c"Inexact conversion" +``` +""" +macro device_override(ex) + ex = macroexpand(__module__, ex) + if Meta.isexpr(ex, :call) + @show ex = eval(ex) + error() + end + code = quote + $Base.Experimental.@overlay($StaticCompiler.method_table, $ex) + end + return esc(code) +end + macro print_and_throw(err) quote - println($err) + printf($err) libcexit(Int32(1)) end end +libcexit(x::Int32) = @symbolcall exit(x::Int32)::Nothing # math.jl @device_override @noinline Base.Math.throw_complex_domainerror(f::Symbol, x) = @@ -37,9 +68,12 @@ end @device_override @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = @print_and_throw c"Inexact conversion" -# abstractarray.jl -@device_override @noinline Base.throw_boundserror(A, I) = - @print_and_throw c"Out-of-bounds array access" +# abstractarray.jl +# Base.throw_boundserror is removed since v1.11 +if VERSION < v"1.11" + @device_override @noinline Base.throw_boundserror(A, I) = + @print_and_throw c"Out-of-bounds array access" +end # trig.jl @device_override @noinline Base.Math.sincos_domain_error(x) = diff --git a/src/target.jl b/src/target.jl index 5faec0d..6ee5378 100644 --- a/src/target.jl +++ b/src/target.jl @@ -1,9 +1,3 @@ -@static if isdefined(Base.Experimental, Symbol("@overlay")) - Base.Experimental.@MethodTable(method_table) -else - const method_table = nothing -end - """ ```julia StaticTarget() # Native target @@ -54,30 +48,6 @@ set_compiler!(target::StaticTarget, compiler::String) = (target.compiler = compi set_runtime!(target::StaticTarget, julia_runtime::Bool) = (target.julia_runtime = julia_runtime) -""" -```julia -@device_override old_bad_method(arg1::Type1, arg2::Type2) = new_good_method(arg1, arg2) -``` -Override a non-static-compilable method (e.g. `old_bad_method(::Type1, ::Type2)`) -with a more compileable replacement. -### Examples -``` -@device_override @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = - @print_and_throw c"Inexact conversion" -``` -""" -macro device_override(ex) - ex = macroexpand(__module__, ex) - if Meta.isexpr(ex, :call) - @show ex = eval(ex) - error() - end - code = quote - $Base.Experimental.@overlay($StaticCompiler.method_table, $ex) - end - return esc(code) -end - # Default to native struct StaticCompilerTarget{MT} <: GPUCompiler.AbstractCompilerTarget triple::String @@ -121,27 +91,17 @@ GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget, Stati GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget}) = true GPUCompiler.uses_julia_runtime(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget}) = job.config.target.julia_runtime -GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget, StaticCompilerParams}) = - StaticInterpreter(job.config.params.cache, GPUCompiler.method_table(job), job.world, - GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job)) -GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget, StaticCompilerParams}) = job.config.params.cache +@static if HAS_INTEGRATED_CACHE + GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget, StaticCompilerParams}) = + StaticInterpreter(job.world, GPUCompiler.method_table(job), GPUCompiler.ci_cache_token(job), inference_params(job), optimization_params(job)) +else + GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget, StaticCompilerParams}) = job.config.params.cache + GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget, StaticCompilerParams}) = + StaticInterpreter(job.world, GPUCompiler.method_table(job), job.config.params.cache, inference_params(job), optimization_params(job)) +end GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{<:StaticCompilerTarget})) = job.config.target.method_table -function static_job(@nospecialize(func::Function), @nospecialize(types::Type); - name = fix_name(func), - kernel::Bool = false, - target::StaticTarget = StaticTarget(), - method_table=method_table, - kwargs... - ) - source = methodinstance(typeof(func), Base.to_tuple_type(types)) - tm = target.tm - gputarget = StaticCompilerTarget(LLVM.triple(tm), LLVM.cpu(tm), LLVM.features(tm), target.julia_runtime, method_table) - params = StaticCompilerParams() - config = GPUCompiler.CompilerConfig(gputarget, params, name = name, kernel = kernel) - StaticCompiler.CompilerJob(source, config), kwargs -end function static_job(@nospecialize(func), @nospecialize(types); name = fix_name(func), kernel::Bool = false, @@ -153,6 +113,11 @@ function static_job(@nospecialize(func), @nospecialize(types); tm = target.tm gputarget = StaticCompilerTarget(LLVM.triple(tm), LLVM.cpu(tm), LLVM.features(tm), target.julia_runtime, method_table) params = StaticCompilerParams() - config = GPUCompiler.CompilerConfig(gputarget, params, name = name, kernel = kernel) - StaticCompiler.CompilerJob(source, config), kwargs + @static if pkgversion(GPUCompiler) < v"1" + config = GPUCompiler.CompilerConfig(gputarget, params; name = name, kernel = kernel) + return StaticCompiler.CompilerJob(source, config), kwargs + else + config = GPUCompiler.CompilerConfig(gputarget, params; name = name, kernel = kernel, kwargs...) + return StaticCompiler.CompilerJob(source, config), Dict{}() + end end \ No newline at end of file diff --git a/test/Project.toml b/test/Project.toml index a36e208..96e84ca 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -16,4 +16,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e" [compat] -Bumper = "0.6" \ No newline at end of file +Bumper = "0.6" diff --git a/test/runtests.jl b/test/runtests.jl index 542659c..9fa6b17 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,18 +6,18 @@ using LoopVectorization using ManualMemory using Distributed using StaticTools -using StrideArraysCore +# using StrideArraysCore using MacroTools using LLD_jll using Bumper addprocs(1) -@everywhere using StaticCompiler, StrideArraysCore +# @everywhere using StaticCompiler, StrideArraysCore const GROUP = get(ENV, "GROUP", "All") if GROUP == "Core" || GROUP == "All" - include("testcore.jl") + include("testcore.jl") end if GROUP == "Integration" || GROUP == "All" diff --git a/test/scripts/loopvec_matrix.jl b/test/scripts/loopvec_matrix.jl index 7b19ce8..cf6b213 100644 --- a/test/scripts/loopvec_matrix.jl +++ b/test/scripts/loopvec_matrix.jl @@ -3,9 +3,11 @@ using StaticTools using LoopVectorization @inline function mul!(C::MallocArray, A::MallocArray, B::MallocArray) - @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) + #@turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) + @turbo for n ∈ indices(C, 2), m ∈ indices(C, 1) Cmn = zero(eltype(C)) - for k ∈ indices((A,B), (2,1)) + # for k ∈ indices((A,B), (2,1)) + for k ∈ indices(A, 2) Cmn += A[m,k] * B[k,n] end C[m,n] = Cmn @@ -39,7 +41,8 @@ function loopvec_matrix(argc::Int, argv::Ptr{Ptr{UInt8}}) mul!(C, B, A) # Print to stdout - printf(C) + printf(c"C matric = \n") + print(C) # Also print to file printdlm(c"table.tsv", C, '\t') fwrite(c"table.b", C) diff --git a/test/scripts/loopvec_matrix_stack.jl b/test/scripts/loopvec_matrix_stack.jl index 5e0c90d..ec46a94 100644 --- a/test/scripts/loopvec_matrix_stack.jl +++ b/test/scripts/loopvec_matrix_stack.jl @@ -3,9 +3,12 @@ using StaticTools using LoopVectorization @inline function mul!(C::StackArray, A::StackArray, B::StackArray) - @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) + # error since Julia v1.11 + #@turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) + @turbo for n ∈ indices(C, 2), m ∈ indices(C, 1) Cmn = zero(eltype(C)) - for k ∈ indices((A,B), (2,1)) + # for k ∈ indices((A,B), (2,1)) + for k ∈ indices(A, 2) Cmn += A[m,k] * B[k,n] end C[m,n] = Cmn @@ -38,6 +41,7 @@ function loopvec_matrix_stack() mul!(C, B, A) # Print to stdout + printf(c"C matric = \n") printf(C) # Also print to file fp = fopen(c"table.tsv",c"w") diff --git a/test/testcore.jl b/test/testcore.jl index 064f010..f77b082 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -1,7 +1,5 @@ workdir = tempdir() - - fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 @testset "Standalone Dylibs" begin @@ -10,7 +8,7 @@ fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globall # fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) #Compile dylib - name = repr(fib) + name = string(nameof(fib)) # repr(fib) filepath = compile_shlib(fib, (Int,), workdir, name, demangle=true) @test occursin("fib.$(Libdl.dlext)", filepath) # Open dylib manually