diff --git a/test/Project.toml b/test/Project.toml index 7354409e..71311c11 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,8 +2,10 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" +IOCapture = "b5f81e59-6552-4d32-b1f0-c071b021bf89" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" +LLVM_jll = "86de99a1-58d6-5da7-8064-bd56ce2e322c" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" diff --git a/test/bpf.jl b/test/bpf.jl index d94b0d32..a4b10f38 100644 --- a/test/bpf.jl +++ b/test/bpf.jl @@ -1,38 +1,68 @@ @testset "No-op" begin - kernel() = 0 + mod = @eval module $(gensym()) + kernel() = 0 + end - output = sprint(io->BPF.code_native(io, kernel, ())) - @test occursin("\tr0 = 0\n\texit", output) + @test @filecheck begin + check"CHECK-LABEL: julia_kernel_{{[0-9_]*}}:" + check"CHECK: r0 = 0" + check"CHECK-NEXT: exit" + BPF.code_native(mod.kernel, ()) + end end @testset "Return argument" begin - kernel(x) = x + mod = @eval module $(gensym()) + kernel(x) = x + end - output = sprint(io->BPF.code_native(io, kernel, (UInt64,))) - @test occursin("\tr0 = r1\n\texit", output) + @test @filecheck begin + check"CHECK-LABEL: julia_kernel_{{[0-9_]*}}:" + check"CHECK: r0 = r1" + check"CHECK-NEXT: exit" + BPF.code_native(mod.kernel, (UInt64,)) + end end @testset "Addition" begin - kernel(x) = x+1 + mod = @eval module $(gensym()) + kernel(x) = x+1 + end - output = sprint(io->BPF.code_native(io, kernel, (UInt64,))) - @test occursin("\tr0 = r1\n\tr0 += 1\n\texit", output) + @test @filecheck begin + check"CHECK-LABEL: julia_kernel_{{[0-9_]*}}:" + check"CHECK: r0 = r1" + check"CHECK-NEXT: r0 += 1" + check"CHECK-NEXT: exit" + BPF.code_native(mod.kernel, (UInt64,)) + end end @testset "Errors" begin - kernel(x) = fakefunc(x) + mod = @eval module $(gensym()) + kernel(x) = fakefunc(x) + end - @test_throws GPUCompiler.InvalidIRError BPF.code_execution(kernel, (UInt64,)) + @test_throws GPUCompiler.InvalidIRError BPF.code_execution(mod.kernel, (UInt64,)) end @testset "Function Pointers" begin @testset "valid" begin - goodcall(x) = Base.llvmcall("%2 = call i64 inttoptr (i64 3 to i64 (i64)*)(i64 %0)\nret i64 %2", Int, Tuple{Int}, x) - kernel(x) = goodcall(x) + mod = @eval module $(gensym()) + goodcall(x) = Base.llvmcall("%2 = call i64 inttoptr (i64 3 to i64 (i64)*)(i64 %0)\nret i64 %2", Int, Tuple{Int}, x) + kernel(x) = goodcall(x) + end - output = sprint(io->BPF.code_native(io, kernel, (Int,))) - @test occursin(r"\tcall .*\n\texit", output) + @test @filecheck begin + check"CHECK-LABEL: julia_kernel_{{[0-9_]*}}:" + check"CHECK: call" + check"CHECK-NEXT: exit" + BPF.code_native(mod.kernel, (Int,)) + end end + @testset "invalid" begin - badcall(x) = Base.llvmcall("%2 = call i64 inttoptr (i64 3000 to i64 (i64)*)(i64 %0)\nret i64 %2", Int, Tuple{Int}, x) - kernel(x) = badcall(x) + mod = @eval module $(gensym()) + badcall(x) = Base.llvmcall("%2 = call i64 inttoptr (i64 3000 to i64 (i64)*)(i64 %0)\nret i64 %2", Int, Tuple{Int}, x) + kernel(x) = badcall(x) + end - @test_throws GPUCompiler.InvalidIRError BPF.code_execution(kernel, (Int,)) + @test_throws GPUCompiler.InvalidIRError BPF.code_execution(mod.kernel, (Int,)) end end diff --git a/test/gcn.jl b/test/gcn.jl index c16bc5d4..0108d6a5 100644 --- a/test/gcn.jl +++ b/test/gcn.jl @@ -1,15 +1,24 @@ if :AMDGPU in LLVM.backends() + +# XXX: generic `sink` generates an instruction selection error +sink_gcn(i) = sink(i, Val(5)) + @testset "IR" begin @testset "kernel calling convention" begin - kernel() = return + mod = @eval module $(gensym()) + kernel() = return + end - ir = sprint(io->GCN.code_llvm(io, kernel, Tuple{}; dump_module=true)) - @test !occursin("amdgpu_kernel", ir) + @test @filecheck begin + check"CHECK-NOT: amdgpu_kernel" + GCN.code_llvm(mod.kernel, Tuple{}; dump_module=true) + end - ir = sprint(io->GCN.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true)) - @test occursin("amdgpu_kernel", ir) + @test @filecheck begin + check"CHECK: amdgpu_kernel" + GCN.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true) + end end end @@ -18,48 +27,62 @@ end @testset "assembly" begin @testset "skip scalar trap" begin - workitem_idx_x() = ccall("llvm.amdgcn.workitem.id.x", llvmcall, Int32, ()) - trap() = ccall("llvm.trap", llvmcall, Nothing, ()) - function kernel() - if workitem_idx_x() > 1 - trap() + mod = @eval module $(gensym()) + workitem_idx_x() = ccall("llvm.amdgcn.workitem.id.x", llvmcall, Int32, ()) + trap() = ccall("llvm.trap", llvmcall, Nothing, ()) + + function kernel() + if workitem_idx_x() > 1 + trap() + end + return end - return end - asm = sprint(io->GCN.code_native(io, kernel, Tuple{})) - @test occursin("s_trap 2", asm) - @test_skip occursin("s_cbranch_execz", asm) - if Base.libllvm_version < v"9" - @test_broken occursin("v_readfirstlane", asm) + @test @filecheck begin + check"CHECK-LABEL: {{(julia|j)_kernel_[0-9]+}}:" + check"CHECK: s_cbranch_exec" + check"CHECK: s_trap 2" + GCN.code_native(mod.kernel, Tuple{}) end end @testset "child functions" begin # we often test using @noinline child functions, so test whether these survive # (despite not having side-effects) - @noinline child(i) = sink_gcn(i) - function parent(i) - child(i) - return + mod = @eval module $(gensym()) + import ..sink_gcn + @noinline child(i) = sink_gcn(i) + function parent(i) + child(i) + return + end end - asm = sprint(io->GCN.code_native(io, parent, Tuple{Int64}; dump_module=true)) - @test occursin(r"s_add_u32.*(julia|j)_child_.*@rel32@", asm) - @test occursin(r"s_addc_u32.*(julia|j)_child_.*@rel32@", asm) + @test @filecheck begin + check"CHECK-LABEL: {{(julia|j)_parent_[0-9]+}}:" + check"CHECK: s_add_u32 {{.+}} {{(julia|j)_child_[0-9]+}}@rel32@" + check"CHECK: s_addc_u32 {{.+}} {{(julia|j)_child_[0-9]+}}@rel32@" + GCN.code_native(mod.parent, Tuple{Int64}; dump_module=true) + end end @testset "kernel functions" begin - @noinline nonentry(i) = sink_gcn(i) - function entry(i) - nonentry(i) - return + mod = @eval module $(gensym()) + import ..sink_gcn + @noinline nonentry(i) = sink_gcn(i) + function entry(i) + nonentry(i) + return + end end - asm = sprint(io->GCN.code_native(io, entry, Tuple{Int64}; dump_module=true, kernel=true)) - @test occursin(r"\.amdhsa_kernel \w*entry", asm) - @test !occursin(r"\.amdhsa_kernel \w*nonentry", asm) - @test occursin(r"\.type.*\w*nonentry\w*,@function", asm) + @test @filecheck begin + check"CHECK-NOT: .amdhsa_kernel {{(julia|j)_nonentry_[0-9]+}}" + check"CHECK: .type {{(julia|j)_nonentry_[0-9]+}},@function" + check"CHECK: .amdhsa_kernel _Z5entry5Int64" + GCN.code_native(mod.entry, Tuple{Int64}; dump_module=true, kernel=true) + end end @testset "child function reuse" begin @@ -67,8 +90,7 @@ end # the child only being present once mod = @eval module $(gensym()) - export child, parent1, parent2 - + import ..sink_gcn @noinline child(i) = sink_gcn(i) function parent1(i) child(i) @@ -80,11 +102,15 @@ end end end - asm = sprint(io->GCN.code_native(io, mod.parent1, Tuple{Int}; dump_module=true)) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child_\d*,@function", asm) + @test @filecheck begin + check"CHECK: .type {{(julia|j)_child_[0-9]+}},@function" + GCN.code_native(mod.parent1, Tuple{Int}; dump_module=true) + end - asm = sprint(io->GCN.code_native(io, mod.parent2, Tuple{Int}; dump_module=true)) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child_\d*,@function", asm) + @test @filecheck begin + check"CHECK: .type {{(julia|j)_child_[0-9]+}},@function" + GCN.code_native(mod.parent2, Tuple{Int}; dump_module=true) + end end @testset "child function reuse bis" begin @@ -92,8 +118,7 @@ end # in the case of two child functions mod = @eval module $(gensym()) - export parent1, parent2, child1, child2 - + import ..sink_gcn @noinline child1(i) = sink_gcn(i) @noinline child2(i) = sink_gcn(i+1) function parent1(i) @@ -106,13 +131,17 @@ end end end - asm = sprint(io->GCN.code_native(io, mod.parent1, Tuple{Int}; dump_module=true)) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child1_\d*,@function", asm) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child2_\d*,@function", asm) + @test @filecheck begin + check"CHECK-DAG: .type {{(julia|j)_child1_[0-9]+}},@function" + check"CHECK-DAG: .type {{(julia|j)_child2_[0-9]+}},@function" + GCN.code_native(mod.parent1, Tuple{Int}; dump_module=true) + end - asm = sprint(io->GCN.code_native(io, mod.parent2, Tuple{Int}; dump_module=true)) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child1_\d*,@function", asm) - @test occursin(r"\.type.*(julia|j)_[[:alnum:]_.]*child2_\d*,@function", asm) + @test @filecheck begin + check"CHECK-DAG: .type {{(julia|j)_child1_[0-9]+}},@function" + check"CHECK-DAG: .type {{(julia|j)_child2_[0-9]+}},@function" + GCN.code_native(mod.parent2, Tuple{Int}; dump_module=true) + end end @testset "indirect sysimg function use" begin @@ -121,47 +150,57 @@ end # NOTE: Int32 to test for #49 - function kernel(out) - wid, lane = fldmod1(unsafe_load(out), Int32(32)) - unsafe_store!(out, wid) - return + mod = @eval module $(gensym()) + function kernel(out) + wid, lane = fldmod1(unsafe_load(out), Int32(32)) + unsafe_store!(out, wid) + return + end end - asm = sprint(io->GCN.code_native(io, kernel, Tuple{Ptr{Int32}})) - @test !occursin("jl_throw", asm) - @test !occursin("jl_invoke", asm) # forced recompilation should still not invoke + @test @filecheck begin + check"CHECK-LABEL: {{(julia|j)_kernel_[0-9]+}}:" + check"CHECK-NOT: jl_throw" + check"CHECK-NOT: jl_invoke" + GCN.code_native(mod.kernel, Tuple{Ptr{Int32}}) + end end @testset "LLVM intrinsics" begin # issue #13 (a): cannot select trunc - function kernel(x) - unsafe_trunc(Int, x) - return + mod = @eval module $(gensym()) + function kernel(x) + unsafe_trunc(Int, x) + return + end end - GCN.code_native(devnull, kernel, Tuple{Float64}) + GCN.code_native(devnull, mod.kernel, Tuple{Float64}) @test "We did not crash!" != "" end # FIXME: _ZNK4llvm14TargetLowering20scalarizeVectorStoreEPNS_11StoreSDNodeERNS_12SelectionDAGE false && @testset "exception arguments" begin - function kernel(a) - unsafe_store!(a, trunc(Int, unsafe_load(a))) - return + mod = @eval module $(gensym()) + function kernel(a) + unsafe_store!(a, trunc(Int, unsafe_load(a))) + return + end end - GCN.code_native(devnull, kernel, Tuple{Ptr{Float64}}) + GCN.code_native(devnull, mod.kernel, Tuple{Ptr{Float64}}) end # FIXME: in function julia_inner_18528 void (%jl_value_t addrspace(10)*): invalid addrspacecast false && @testset "GC and TLS lowering" begin mod = @eval module $(gensym()) + import ..sink_gcn mutable struct PleaseAllocate y::Csize_t end # common pattern in Julia 0.7: outlined throw to avoid a GC frame in the calling code @noinline function inner(x) - sink(x.y) + sink_gcn(x.y) nothing end @@ -171,12 +210,14 @@ false && @testset "GC and TLS lowering" begin end end - asm = sprint(io->GCN.code_native(io, mod.kernel, Tuple{Int})) - @test occursin("gpu_gc_pool_alloc", asm) - @test !occursin("julia.push_gc_frame", asm) - @test !occursin("julia.pop_gc_frame", asm) - @test !occursin("julia.get_gc_frame_slot", asm) - @test !occursin("julia.new_gc_frame", asm) + @test @filecheck begin + check"CHECK-NOT: jl_push_gc_frame" + check"CHECK-NOT: jl_pop_gc_frame" + check"CHECK-NOT: jl_get_gc_frame_slot" + check"CHECK-NOT: jl_new_gc_frame" + check"CHECK: gpu_gc_pool_alloc" + GCN.code_native(mod.kernel, Tuple{Int}) + end # make sure that we can still ellide allocations function ref_kernel(ptr, i) @@ -191,26 +232,31 @@ false && @testset "GC and TLS lowering" begin return nothing end - asm = sprint(io->GCN.code_native(io, ref_kernel, Tuple{Ptr{Int64}, Int})) - - - @test !occursin("gpu_gc_pool_alloc", asm) + @test @filecheck begin + check"CHECK-NOT: gpu_gc_pool_alloc" + GCN.code_native(ref_kernel, Tuple{Ptr{Int64}, Int}) + end end @testset "float boxes" begin - function kernel(a,b) - c = Int32(a) - # the conversion to Int32 may fail, in which case the input Float32 is boxed in order to - # pass it to the @nospecialize exception constructor. we should really avoid that (eg. - # by avoiding @nospecialize, or optimize the unused arguments away), but for now the box - # should just work. - unsafe_store!(b, c) - return - end - - ir = sprint(io->GCN.code_llvm(io, kernel, Tuple{Float32,Ptr{Float32}})) - @test occursin("jl_box_float32", ir) - GCN.code_native(devnull, kernel, Tuple{Float32,Ptr{Float32}}) + mod = @eval module $(gensym()) + function kernel(a,b) + c = Int32(a) + # the conversion to Int32 may fail, in which case the input Float32 is boxed in order to + # pass it to the @nospecialize exception constructor. we should really avoid that (eg. + # by avoiding @nospecialize, or optimize the unused arguments away), but for now the box + # should just work. + unsafe_store!(b, c) + return + end + end + + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: jl_box_float32" + GCN.code_llvm(mod.kernel, Tuple{Float32,Ptr{Float32}}) + end + GCN.code_native(devnull, mod.kernel, Tuple{Float32,Ptr{Float32}}) end end diff --git a/test/helpers/bpf.jl b/test/helpers/bpf.jl index d66b6b48..15eaa121 100644 --- a/test/helpers/bpf.jl +++ b/test/helpers/bpf.jl @@ -25,6 +25,15 @@ function code_native(io::IO, @nospecialize(func), @nospecialize(types); kwargs.. GPUCompiler.code_native(io, job; kwargs...) end +# aliases without ::IO argument +for method in (:code_llvm, :code_native) + method = Symbol("$(method)") + @eval begin + $method(@nospecialize(func), @nospecialize(types); kwargs...) = + $method(stdout, func, types; kwargs...) + end +end + # simulates codegen for a kernel function: validates by default function code_execution(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = create_job(func, types; kwargs...) diff --git a/test/helpers/gcn.jl b/test/helpers/gcn.jl index f7f54f85..c894fbd3 100644 --- a/test/helpers/gcn.jl +++ b/test/helpers/gcn.jl @@ -35,6 +35,15 @@ function code_native(io::IO, @nospecialize(func), @nospecialize(types); kwargs.. GPUCompiler.code_native(io, job; kwargs...) end +# aliases without ::IO argument +for method in (:code_warntype, :code_llvm, :code_native) + method = Symbol("$(method)") + @eval begin + $method(@nospecialize(func), @nospecialize(types); kwargs...) = + $method(stdout, func, types; kwargs...) + end +end + # simulates codegen for a kernel function: validates by default function code_execution(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = create_job(func, types; kernel=true, kwargs...) diff --git a/test/helpers/metal.jl b/test/helpers/metal.jl index d46f9a89..41eb0fbe 100644 --- a/test/helpers/metal.jl +++ b/test/helpers/metal.jl @@ -35,6 +35,15 @@ function code_native(io::IO, @nospecialize(func), @nospecialize(types); kwargs.. GPUCompiler.code_native(io, job; kwargs...) end +# aliases without ::IO argument +for method in (:code_warntype, :code_llvm, :code_native) + method = Symbol("$(method)") + @eval begin + $method(@nospecialize(func), @nospecialize(types); kwargs...) = + $method(stdout, func, types; kwargs...) + end +end + # simulates codegen for a kernel function: validates by default function code_execution(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = create_job(func, types; kernel=true, kwargs...) diff --git a/test/helpers/ptx.jl b/test/helpers/ptx.jl index 5f8a3c48..e82416bc 100644 --- a/test/helpers/ptx.jl +++ b/test/helpers/ptx.jl @@ -67,6 +67,15 @@ function code_native(io::IO, @nospecialize(func), @nospecialize(types); kwargs.. GPUCompiler.code_native(io, job; kwargs...) end +# aliases without ::IO argument +for method in (:code_warntype, :code_llvm, :code_native) + method = Symbol("$(method)") + @eval begin + $method(@nospecialize(func), @nospecialize(types); kwargs...) = + $method(stdout, func, types; kwargs...) + end +end + # simulates codegen for a kernel function: validates by default function code_execution(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = create_job(func, types; kernel=true, kwargs...) diff --git a/test/helpers/spirv.jl b/test/helpers/spirv.jl index 73d030d1..0144cd6a 100644 --- a/test/helpers/spirv.jl +++ b/test/helpers/spirv.jl @@ -38,6 +38,15 @@ function code_native(io::IO, @nospecialize(func), @nospecialize(types); kwargs.. GPUCompiler.code_native(io, job; kwargs...) end +# aliases without ::IO argument +for method in (:code_warntype, :code_llvm, :code_native) + method = Symbol("$(method)") + @eval begin + $method(@nospecialize(func), @nospecialize(types); kwargs...) = + $method(stdout, func, types; kwargs...) + end +end + # simulates codegen for a kernel function: validates by default function code_execution(@nospecialize(func), @nospecialize(types); kwargs...) job, kwargs = create_job(func, types; kernel=true, kwargs...) diff --git a/test/helpers/test.jl b/test/helpers/test.jl index eb60c9a1..014ddb27 100644 --- a/test/helpers/test.jl +++ b/test/helpers/test.jl @@ -35,3 +35,121 @@ end ret i64 %value""" return :(Base.llvmcall($llvmcall_str, T, Tuple{T}, i)) end + +# filecheck utils + +module FileCheck + import LLVM_jll + import IOCapture + using GPUCompiler, LLVM + + export filecheck, @filecheck, @check_str + + global filecheck_path::String + function __init__() + global filecheck_path = joinpath(LLVM_jll.artifact_dir, "tools", "FileCheck") + end + + function filecheck_exe(; adjust_PATH::Bool=true, adjust_LIBPATH::Bool=true) + env = Base.invokelatest( + LLVM_jll.JLLWrappers.adjust_ENV!, + copy(ENV), + LLVM_jll.PATH[], + LLVM_jll.LIBPATH[], + adjust_PATH, + adjust_LIBPATH + ) + + return Cmd(Cmd([filecheck_path]); env) + end + + const julia_typed_pointers = JuliaContext() do ctx + supports_typed_pointers(ctx) + end + + function filecheck(f, input) + # FileCheck assumes that the input is available as a file + mktemp() do path, input_io + write(input_io, input) + close(input_io) + + # capture the output of `f` and write it into a temporary buffer + result = IOCapture.capture(rethrow=Union{}) do + f(input) + end + output_io = IOBuffer() + write(output_io, result.output) + println(output_io) + + if result.error + # if the function errored, also render the exception and backtrace + showerror(output_io, result.value, result.backtrace) + elseif result.value !== nothing + # also show the returned value; some APIs don't print + write(output_io, string(result.value)) + end + + # determine some useful prefixes for FileCheck + prefixes = ["CHECK", + "JULIA$(VERSION.major)_$(VERSION.minor)", + "LLVM$(Base.libllvm_version.major)"] + ## whether we use typed pointers or opaque pointers + if julia_typed_pointers + push!(prefixes, "TYPED") + else + push!(prefixes, "OPAQUE") + end + ## whether we pass pointers as integers or as actual pointers + if VERSION >= v"1.12.0-DEV.225" + push!(prefixes, "PTR_ABI") + else + push!(prefixes, "INTPTR_ABI") + end + + # now pass the collected output to FileCheck + seekstart(output_io) + filecheck_io = Pipe() + cmd = ```$(filecheck_exe()) + --color + --allow-unused-prefixes + --check-prefixes $(join(prefixes, ',')) + $path``` + proc = run(pipeline(ignorestatus(cmd); stdin=output_io, stdout=filecheck_io, stderr=filecheck_io); wait=false) + close(filecheck_io.in) + + # collect the output of FileCheck + reader = Threads.@spawn String(read(filecheck_io)) + Base.wait(proc) + log = strip(fetch(reader)) + + # error out if FileCheck did not succeed. + # otherwise, return true so that `@test @filecheck` works as expected. + if !success(proc) + error(log) + end + return true + end + end + + # collect checks used in the @filecheck block by piggybacking on macro expansion + const checks = String[] + macro check_str(str) + push!(checks, str) + nothing + end + + macro filecheck(ex) + ex = Base.macroexpand(__module__, ex) + if isempty(checks) + error("No checks provided within the @filecheck macro block") + end + check_str = join(checks, "\n") + empty!(checks) + + esc(quote + filecheck($check_str) do _ + $ex + end + end) + end +end diff --git a/test/metal.jl b/test/metal.jl index ae854353..951781f1 100644 --- a/test/metal.jl +++ b/test/metal.jl @@ -2,110 +2,151 @@ @testset "kernel functions" begin @testset "byref aggregates" begin - kernel(x) = return + mod = @eval module $(gensym()) + kernel(x) = return + end - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Tuple{Int}})) - @test occursin(r"@\w*kernel\w*\(({ i64 }|\[1 x i64\])\*", ir) || - occursin(r"@\w*kernel\w*\(ptr", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"TYPED-SAME: ({{(\{ i64 \}|\[1 x i64\])}}*" + check"OPAQUE-SAME: (ptr" + Metal.code_llvm(mod.kernel, Tuple{Tuple{Int}}) + end # for kernels, every pointer argument needs to take an address space - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Tuple{Int}}; kernel=true)) - @test occursin(r"@\w*kernel\w*\(({ i64 }|\[1 x i64\]) addrspace\(1\)\*", ir) || - occursin(r"@\w*kernel\w*\(ptr addrspace\(1\)", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @_Z6kernel5TupleI5Int64E" + check"TYPED-SAME: ({{(\{ i64 \}|\[1 x i64\])}} addrspace(1)*" + check"OPAQUE-SAME: (ptr addrspace(1)" + Metal.code_llvm(mod.kernel, Tuple{Tuple{Int}}; kernel=true) + end end @testset "byref primitives" begin - kernel(x) = return + mod = @eval module $(gensym()) + kernel(x) = return + end - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Int})) - @test occursin(r"@\w*kernel\w*\(i64 ", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK-SAME: (i64" + Metal.code_llvm(mod.kernel, Tuple{Int}) + end # for kernels, every pointer argument needs to take an address space - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Int}; kernel=true)) - @test occursin(r"@\w*kernel\w*\(i64 addrspace\(1\)\*", ir) || - occursin(r"@\w*kernel\w*\(ptr addrspace\(1\)", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @_Z6kernel5Int64" + check"TYPED-SAME: (i64 addrspace(1)*" + check"OPAQUE-SAME: (ptr addrspace(1)" + Metal.code_llvm(mod.kernel, Tuple{Int}; kernel=true) + end end @testset "module metadata" begin - kernel() = return + mod = @eval module $(gensym()) + kernel() = return + end - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true)) - @test occursin("air.version", ir) - @test occursin("air.language_version", ir) - @test occursin("air.max_device_buffers", ir) + @test @filecheck begin + check"CHECK: air.version" + check"CHECK: air.language_version" + check"CHECK: air.max_device_buffers" + Metal.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true) + end end @testset "argument metadata" begin - kernel(x) = return + mod = @eval module $(gensym()) + kernel(x) = return + end - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Int}; - dump_module=true, kernel=true)) - @test occursin("air.buffer", ir) + @test @filecheck begin + check"CHECK: air.buffer" + Metal.code_llvm(mod.kernel, Tuple{Int}; dump_module=true, kernel=true) + end # XXX: perform more exhaustive testing of argument passing metadata here, # or just defer to execution testing in Metal.jl? end @testset "input arguments" begin - function kernel(ptr) - idx = ccall("extern julia.air.thread_position_in_threadgroup.i32", llvmcall, UInt32, ()) + 1 - unsafe_store!(ptr, 42, idx) - return + mod = @eval module $(gensym()) + function kernel(ptr) + idx = ccall("extern julia.air.thread_position_in_threadgroup.i32", + llvmcall, UInt32, ()) + 1 + unsafe_store!(ptr, 42, idx) + return + end end - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Core.LLVMPtr{Int,1}})) - @test occursin(r"@\w*kernel\w*\(.* addrspace\(1\)\* %.+\)", ir) || - occursin(r"@\w*kernel\w*\(ptr addrspace\(1\) %.+\)", ir) - @test occursin(r"call i32 @julia.air.thread_position_in_threadgroup.i32", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"TYPED-SAME: ({{.+}} addrspace(1)* %{{.+}})" + check"OPAQUE-SAME: (ptr addrspace(1) %{{.+}})" + check"CHECK: call i32 @julia.air.thread_position_in_threadgroup.i32" + Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Int,1}}) + end - ir = sprint(io->Metal.code_llvm(io, kernel, Tuple{Core.LLVMPtr{Int,1}}; kernel=true)) - @test occursin(r"@\w*kernel\w*\(.* addrspace\(1\)\* %.+, i32 %thread_position_in_threadgroup\)", ir) || - occursin(r"@\w*kernel\w*\(ptr addrspace\(1\) %.+, i32 %thread_position_in_threadgroup\)", ir) - @test !occursin(r"call i32 @julia.air.thread_position_in_threadgroup.i32", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @_Z6kernel7LLVMPtrI5Int64Li1EE" + check"TYPED-SAME: ({{.+}} addrspace(1)* %{{.+}}, i32 %thread_position_in_threadgroup)" + check"OPAQUE-SAME: (ptr addrspace(1) %{{.+}}, i32 %thread_position_in_threadgroup)" + check"CHECK-NOT: call i32 @julia.air.thread_position_in_threadgroup.i32" + Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Int,1}}; kernel=true) + end end @testset "vector intrinsics" begin - foo(x, y) = ccall("llvm.smax.v2i64", llvmcall, NTuple{2, VecElement{Int64}}, - (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}}), x, y) + mod = @eval module $(gensym()) + foo(x, y) = ccall("llvm.smax.v2i64", llvmcall, NTuple{2, VecElement{Int64}}, + (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}}), x, y) + end - ir = sprint(io->Metal.code_llvm(io, foo, (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}}))) - @test occursin("air.max.s.v2i64", ir) + @test @filecheck begin + check"CHECK-LABEL: define <2 x i64> @{{(julia|j)_foo_[0-9]+}}" + check"CHECK: air.max.s.v2i64" + Metal.code_llvm(mod.foo, (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}})) + end end @testset "unsupported type detection" begin - function kernel1(ptr) - buf = reinterpret(Ptr{Float32}, ptr) - val = unsafe_load(buf) - dval = Cdouble(val) - # ccall("extern metal_os_log", llvmcall, Nothing, (Float64,), dval) - Base.llvmcall((""" - declare void @llvm.va_start(i8*) - declare void @llvm.va_end(i8*) - declare void @air.os_log(i8*, i64) - - define void @metal_os_log(...) { - %1 = alloca i8* - %2 = bitcast i8** %1 to i8* - call void @llvm.va_start(i8* %2) - %3 = load i8*, i8** %1 - call void @air.os_log(i8* %3, i64 8) - call void @llvm.va_end(i8* %2) - ret void - } - - define void @entry(double %val) #0 { - call void (...) @metal_os_log(double %val) - ret void - } - - attributes #0 = { alwaysinline }""", "entry"), - Nothing, Tuple{Float64}, dval) - return + mod = @eval module $(gensym()) + function kernel(ptr) + buf = reinterpret(Ptr{Float32}, ptr) + val = unsafe_load(buf) + dval = Cdouble(val) + # ccall("extern metal_os_log", llvmcall, Nothing, (Float64,), dval) + Base.llvmcall((""" + declare void @llvm.va_start(i8*) + declare void @llvm.va_end(i8*) + declare void @air.os_log(i8*, i64) + + define void @metal_os_log(...) { + %1 = alloca i8* + %2 = bitcast i8** %1 to i8* + call void @llvm.va_start(i8* %2) + %3 = load i8*, i8** %1 + call void @air.os_log(i8* %3, i64 8) + call void @llvm.va_end(i8* %2) + ret void + } + + define void @entry(double %val) #0 { + call void (...) @metal_os_log(double %val) + ret void + } + + attributes #0 = { alwaysinline }""", "entry"), + Nothing, Tuple{Float64}, dval) + return + end end - ir = sprint(io->Metal.code_llvm(io, kernel1, Tuple{Core.LLVMPtr{Float32,1}}; validate=true)) - @test occursin("@metal_os_log", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: @metal_os_log" + Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Float32,1}}; validate=true) + end function kernel2(ptr) val = unsafe_load(ptr) @@ -114,7 +155,9 @@ end return end - @test_throws_message(InvalidIRError, Metal.code_execution(kernel2, Tuple{Core.LLVMPtr{Float64,1}})) do msg + @test_throws_message(InvalidIRError, + Metal.code_execution(kernel2, + Tuple{Core.LLVMPtr{Float64,1}})) do msg occursin("unsupported use of double value", msg) end end @@ -130,9 +173,12 @@ end end end - ir = sprint(io->Metal.code_llvm(io, mod.kernel, Tuple{Core.LLVMPtr{Float32,1}, Int}; - dump_module=true, kernel=true)) - @test occursin("addrspace(2) constant [2 x float]", ir) + @test @filecheck begin + check"CHECK: @{{.+}} ={{.*}} addrspace(2) constant [2 x float]" + check"CHECK: define void @_Z6kernel7LLVMPtrI7Float32Li1EE5Int64" + Metal.code_llvm(mod.kernel, Tuple{Core.LLVMPtr{Float32,1}, Int}; + dump_module=true, kernel=true) + end end end diff --git a/test/native.jl b/test/native.jl index 18217613..1b0757ab 100644 --- a/test/native.jl +++ b/test/native.jl @@ -6,82 +6,102 @@ ci, rt = only(GPUCompiler.code_typed(job)) @test rt === Int - ir = sprint(io->GPUCompiler.code_warntype(io, job)) - @test contains(ir, "MethodInstance for identity") + @test @filecheck begin + check"CHECK: MethodInstance for identity" + GPUCompiler.code_warntype(job) + end - ir = sprint(io->GPUCompiler.code_llvm(io, job)) - @test contains(ir, r"(julia|j)_identity") + @test @filecheck begin + check"CHECK: @{{(julia|j)_identity_[0-9]+}}" + GPUCompiler.code_llvm(job) + end - asm = sprint(io->GPUCompiler.code_native(io, job)) - @test contains(asm, r"(julia|j)_identity") + @test @filecheck begin + check"CHECK: @{{(julia|j)_identity_[0-9]+}}" + GPUCompiler.code_native(job) + end end @testset "compilation" begin @testset "callable structs" begin - struct MyCallable end - (::MyCallable)(a, b) = a+b + mod = @eval module $(gensym()) + struct MyCallable end + (::MyCallable)(a, b) = a+b + end - (ci, rt) = Native.code_typed(MyCallable(), (Int, Int), kernel=false)[1] - @test ci.slottypes[1] == Core.Compiler.Const(MyCallable()) + (ci, rt) = Native.code_typed(mod.MyCallable(), (Int, Int), kernel=false)[1] + @test ci.slottypes[1] == Core.Compiler.Const(mod.MyCallable()) end @testset "compilation database" begin - @noinline inner(x) = x+1 - function outer(x) - return inner(x) + mod = @eval module $(gensym()) + @noinline inner(x) = x+1 + function outer(x) + return inner(x) + end end - job, _ = Native.create_job(outer, (Int,)) + job, _ = Native.create_job(mod.outer, (Int,)) JuliaContext() do ctx ir, meta = GPUCompiler.compile(:llvm, job) - meth = only(methods(outer, (Int,))) + meth = only(methods(mod.outer, (Int,))) mis = filter(mi->mi.def == meth, keys(meta.compiled)) @test length(mis) == 1 other_mis = filter(mi->mi.def != meth, keys(meta.compiled)) @test length(other_mis) == 1 - @test only(other_mis).def in methods(inner) + @test only(other_mis).def in methods(mod.inner) end end @testset "advanced database" begin - @noinline inner(x) = x+1 - foo(x) = sum(inner, fill(x, 10, 10)) + mod = @eval module $(gensym()) + @noinline inner(x) = x+1 + foo(x) = sum(inner, fill(x, 10, 10)) + end - job, _ = Native.create_job(foo, (Float64,); validate=false) + job, _ = Native.create_job(mod.foo, (Float64,); validate=false) JuliaContext() do ctx # shouldn't segfault ir, meta = GPUCompiler.compile(:llvm, job) - meth = only(methods(foo, (Float64,))) + meth = only(methods(mod.foo, (Float64,))) mis = filter(mi->mi.def == meth, keys(meta.compiled)) @test length(mis) == 1 inner_methods = filter(keys(meta.compiled)) do mi - mi.def in methods(inner) && mi.specTypes == Tuple{typeof(inner), Float64} + mi.def in methods(mod.inner) && + mi.specTypes == Tuple{typeof(mod.inner), Float64} end @test length(inner_methods) == 1 end end @testset "cached compilation" begin - @gensym child kernel unrelated - @eval @noinline $child(i) = i - @eval $kernel(i) = $child(i)+1 + mod = @eval module $(gensym()) + @noinline child(i) = i + kernel(i) = child(i)+1 + end # smoke test - job, _ = Native.create_job(eval(kernel), (Int64,)) - ir = sprint(io->GPUCompiler.code_llvm(io, job)) - @test contains(ir, r"add i64 %\d+, 1") + job, _ = Native.create_job(mod.kernel, (Int64,)) + @test @filecheck begin + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: add i64 %{{[0-9]+}}, 1" + GPUCompiler.code_llvm(job) + end # basic redefinition - @eval $kernel(i) = $child(i)+2 - job, _ = Native.create_job(eval(kernel), (Int64,)) - ir = sprint(io->GPUCompiler.code_llvm(io, job)) - @test contains(ir, r"add i64 %\d+, 2") + @eval mod kernel(i) = child(i)+2 + job, _ = Native.create_job(mod.kernel, (Int64,)) + @test @filecheck begin + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: add i64 %{{[0-9]+}}, 2" + GPUCompiler.code_llvm(job) + end # cached_compilation interface invocations = Ref(0) @@ -94,51 +114,65 @@ end end linker(job, compiled) = compiled cache = Dict() - ft = typeof(eval(kernel)) + ft = typeof(mod.kernel) tt = Tuple{Int64} # initial compilation source = methodinstance(ft, tt, Base.get_world_counter()) - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) - @test contains(ir, r"add i64 %\d+, 2") + @test @filecheck begin + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: add i64 %{{[0-9]+}}, 2" + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + end @test invocations[] == 1 # cached compilation - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) - @test contains(ir, r"add i64 %\d+, 2") + @test @filecheck begin + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: add i64 %{{[0-9]+}}, 2" + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + end @test invocations[] == 1 # redefinition - @eval $kernel(i) = $child(i)+3 + @eval mod kernel(i) = child(i)+3 source = methodinstance(ft, tt, Base.get_world_counter()) - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) - @test contains(ir, r"add i64 %\d+, 3") + @test @filecheck begin + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: add i64 %{{[0-9]+}}, 3" + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + end @test invocations[] == 2 # cached compilation - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) - @test contains(ir, r"add i64 %\d+, 3") + @test @filecheck begin + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: add i64 %{{[0-9]+}}, 3" + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + end @test invocations[] == 2 # redefinition of an unrelated function - @eval $unrelated(i) = 42 - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + @eval mod unrelated(i) = 42 + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) @test invocations[] == 2 # redefining child functions - @eval @noinline $child(i) = i+1 - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + @eval mod @noinline child(i) = i+1 + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) @test invocations[] == 3 # cached compilation - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) @test invocations[] == 3 # change in configuration config = CompilerConfig(job.config; name="foobar") - ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, config, compiler, linker) + @test @filecheck begin + check"CHECK: define i64 @foobar" + Base.invokelatest(GPUCompiler.cached_compilation, cache, source, config, compiler, linker) + end @test invocations[] == 4 - @test contains(ir, "foobar") # tasks running in the background should keep on using the old version c1, c2 = Condition(), Condition() @@ -150,13 +184,16 @@ end end t = @async Base.invokelatest(background, job) wait(c1) # make sure the task has started - @eval $kernel(i) = $child(i)+4 + @eval mod kernel(i) = child(i)+4 source = methodinstance(ft, tt, Base.get_world_counter()) ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker) @test contains(ir, r"add i64 %\d+, 4") notify(c2) # wake up the task - ir = fetch(t) - @test contains(ir, r"add i64 %\d+, 3") + @test @filecheck begin + check"CHECK-LABEL: define i64 @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: add i64 %{{[0-9]+}}, 3" + fetch(t) + end end @testset "allowed mutable types" begin @@ -177,59 +214,71 @@ end @testset "IR" begin @testset "basic reflection" begin - valid_kernel() = return - invalid_kernel() = 1 - - ir = sprint(io->Native.code_llvm(io, valid_kernel, Tuple{}; optimize=false, dump_module=true)) - - # module should contain our function + a generic call wrapper - @test occursin(r"define\ .* void\ @.*(julia|j)_valid_kernel.*\(\)"x, ir) - @test !occursin("define %jl_value_t* @jlcall_", ir) + mod = @eval module $(gensym()) + valid_kernel() = return + invalid_kernel() = 1 + end - # there should be no debug metadata - @test !occursin("!dbg", ir) + @test @filecheck begin + # module should contain our function + a generic call wrapper + check"CHECK: @{{(julia|j)_valid_kernel_[0-9]+}}" + Native.code_llvm(mod.valid_kernel, Tuple{}; optimize=false, dump_module=true) + end - @test Native.code_llvm(devnull, invalid_kernel, Tuple{}) == nothing - @test_throws KernelError Native.code_llvm(devnull, invalid_kernel, Tuple{}; kernel=true) == nothing + @test Native.code_llvm(devnull, mod.invalid_kernel, Tuple{}) == nothing + @test_throws KernelError Native.code_llvm(devnull, mod.invalid_kernel, Tuple{}; kernel=true) == nothing end @testset "unbound typevars" begin - invalid_kernel() where {unbound} = return - @test_throws KernelError Native.code_llvm(devnull, invalid_kernel, Tuple{}) + mod = @eval module $(gensym()) + invalid_kernel() where {unbound} = return + end + @test_throws KernelError Native.code_llvm(devnull, mod.invalid_kernel, Tuple{}) end @testset "child functions" begin # we often test using `@noinline sink` child functions, so test whether these survive - @noinline child(i) = sink(i) - parent(i) = child(i) + mod = @eval module $(gensym()) + import ..sink + @noinline child(i) = sink(i) + parent(i) = child(i) + end - ir = sprint(io->Native.code_llvm(io, parent, Tuple{Int})) - @test occursin(r"call .+ @(julia|j).+child.+", ir) + @test @filecheck begin + check"CHECK-LABEL: define i64 @{{(julia|j)_parent_[0-9]+}}" + check"CHECK: call{{.*}} i64 @{{(julia|j)_child_[0-9]+}}" + Native.code_llvm(mod.parent, Tuple{Int}) + end end @testset "sysimg" begin # bug: use a system image function - - function foobar(a,i) - Base.pointerset(a, 0, mod1(i,10), 8) + mod = @eval module $(gensym()) + function foobar(a,i) + Base.pointerset(a, 0, mod1(i,10), 8) + end end - ir = sprint(io->Native.code_llvm(io, foobar, Tuple{Ptr{Int},Int})) - @test !occursin("jlsys_", ir) + @test @filecheck begin + check"CHECK-NOT: jlsys_" + Native.code_llvm(mod.foobar, Tuple{Ptr{Int},Int}) + end end @testset "tracked pointers" begin - function kernel(a) - a[1] = 1 - return + mod = @eval module $(gensym()) + function kernel(a) + a[1] = 1 + return + end end # this used to throw an LLVM assertion (#223) - Native.code_llvm(devnull, kernel, Tuple{Vector{Int}}; kernel=true) + Native.code_llvm(devnull, mod.kernel, Tuple{Vector{Int}}; kernel=true) @test "We did not crash!" != "" end -@testset "CUDAjl#278" begin +@testset "CUDA.jl#278" begin # codegen idempotency # NOTE: this isn't fixed, but surfaces here due to bad inference of checked_sub # NOTE: with the fix to print_to_string this doesn't error anymore, @@ -259,71 +308,94 @@ end end @testset "slow abi" begin - x = 2 - f = () -> x+1 - ir = sprint(io->Native.code_llvm(io, f, Tuple{}, entry_abi=:func, dump_module=true)) - @test occursin(r"define nonnull {}\* @jfptr", ir) || - occursin(r"define nonnull ptr @jfptr", ir) - @test occursin(r"define internal fastcc .+ @julia", ir) - @test occursin(r"call fastcc .+ @julia", ir) + mod = @eval module $(gensym()) + x = 2 + f = () -> x+1 + end + @test @filecheck begin + check"CHECK: define {{.+}} @julia" + check"TYPED: define nonnull {}* @jfptr" + check"OPAQUE: define nonnull ptr @jfptr" + check"CHECK: call {{.+}} @julia" + Native.code_llvm(mod.f, Tuple{}; entry_abi=:func, dump_module=true) + end end @testset "function entry safepoint emission" begin - ir = sprint(io->Native.code_llvm(io, identity, Tuple{Nothing}; entry_safepoint=false, optimize=false, dump_module=true)) - @test !occursin("%safepoint", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_identity_[0-9]+}}" + check"CHECK-NOT: %safepoint" + Native.code_llvm(identity, Tuple{Nothing}; entry_safepoint=false, optimize=false, dump_module=true) + end - ir = sprint(io->Native.code_llvm(io, identity, Tuple{Nothing}; entry_safepoint=true, optimize=false, dump_module=true)) - @test occursin("%safepoint", ir) broken=(VERSION >= v"1.13.0-DEV.533") # XXX: broken by JuliaLang/julia#57010, # see https://github.com/JuliaLang/julia/pull/57010/files#r2079576894 + if VERSION < v"1.13.0-DEV.533" + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_identity_[0-9]+}}" + check"CHECK: %safepoint" + Native.code_llvm(identity, Tuple{Nothing}; entry_safepoint=true, optimize=false, dump_module=true) + end + end end @testset "always_inline" begin # XXX: broken by JuliaLang/julia#51599, see JuliaGPU/GPUCompiler.jl#527 mod = @eval module $(gensym()) - f_expensive(x) = $(foldl((e, _) -> :($sink($e) + $sink(x)), 1:100; init=:x)) + import ..sink + expensive(x) = $(foldl((e, _) -> :($sink($e) + $sink(x)), 1:100; init=:x)) function g(x) - f_expensive(x) + expensive(x) return end function h(x) - f_expensive(x) + expensive(x) return end end - ir = sprint(io->Native.code_llvm(io, mod.g, Tuple{Int64}; dump_module=true, kernel=true)) - @test occursin(r"^define.*(julia|j)_f_expensive"m, ir) + @test @filecheck begin + check"CHECK: @{{(julia|j)_expensive_[0-9]+}}" + Native.code_llvm(mod.g, Tuple{Int64}; dump_module=true, kernel=true) + end - ir = sprint(io->Native.code_llvm(io, mod.g, Tuple{Int64}; dump_module=true, kernel=true, - always_inline=true)) - @test !occursin(r"^define.*(julia|j)_f_expensive"m, ir) + @test @filecheck begin + check"CHECK-NOT: @{{(julia|j)_expensive_[0-9]+}}" + Native.code_llvm(mod.g, Tuple{Int64}; dump_module=true, kernel=true, always_inline=true) + end - ir = sprint(io->Native.code_llvm(io, mod.h, Tuple{Int64}; dump_module=true, kernel=true, - always_inline=true)) - @test !occursin(r"^define.*(julia|j)_f_expensive"m, ir) + @test @filecheck begin + check"CHECK: @{{(julia|j)_expensive_[0-9]+}}" + Native.code_llvm(mod.h, Tuple{Int64}; dump_module=true, kernel=true) + end - ir = sprint(io->Native.code_llvm(io, mod.h, Tuple{Int64}; dump_module=true, kernel=true)) - @test occursin(r"^define.*(julia|j)_f_expensive"m, ir) + @test @filecheck begin + check"CHECK-NOT: @{{(julia|j)_expensive_[0-9]+}}" + Native.code_llvm(mod.h, Tuple{Int64}; dump_module=true, kernel=true, always_inline=true) + end end @testset "function attributes" begin - @inline function convergent_barrier() - Base.llvmcall((""" - declare void @barrier() #1 - - define void @entry() #0 { - call void @barrier() - ret void - } - - attributes #0 = { alwaysinline } - attributes #1 = { convergent }""", "entry"), - Nothing, Tuple{}) + mod = @eval module $(gensym()) + @inline function convergent_barrier() + Base.llvmcall((""" + declare void @barrier() #1 + + define void @entry() #0 { + call void @barrier() + ret void + } + + attributes #0 = { alwaysinline } + attributes #1 = { convergent }""", "entry"), + Nothing, Tuple{}) + end end - ir = sprint(io->Native.code_llvm(io, convergent_barrier, Tuple{}; dump_module=true, raw=true)) - @test occursin(r"attributes #. = \{ convergent \}", ir) + @test @filecheck begin + check"CHECK: attributes #{{.}} = { convergent }" + Native.code_llvm(mod.convergent_barrier, Tuple{}; dump_module=true, raw=true) + end end end @@ -333,39 +405,45 @@ end @testset "assembly" begin @testset "basic reflection" begin - valid_kernel() = return - invalid_kernel() = 1 + mod = @eval module $(gensym()) + valid_kernel() = return + invalid_kernel() = 1 + end - @test Native.code_native(devnull, valid_kernel, Tuple{}) == nothing - @test Native.code_native(devnull, invalid_kernel, Tuple{}) == nothing - @test_throws KernelError Native.code_native(devnull, invalid_kernel, Tuple{}; kernel=true) + @test Native.code_native(devnull, mod.valid_kernel, Tuple{}) == nothing + @test Native.code_native(devnull, mod.invalid_kernel, Tuple{}) == nothing + @test_throws KernelError Native.code_native(devnull, mod.invalid_kernel, Tuple{}; kernel=true) end @testset "idempotency" begin # bug: generate code twice for the same kernel (jl_to_ptx wasn't idempotent) - - kernel() = return - Native.code_native(devnull, kernel, Tuple{}) - Native.code_native(devnull, kernel, Tuple{}) + mod = @eval module $(gensym()) + kernel() = return + end + Native.code_native(devnull, mod.kernel, Tuple{}) + Native.code_native(devnull, mod.kernel, Tuple{}) @test "We did not crash!" != "" end @testset "compile for host after gpu" begin # issue #11: re-using host functions after GPU compilation - @noinline child(i) = sink(i+1) + mod = @eval module $(gensym()) + import ..sink + @noinline child(i) = sink(i+1) - function fromhost() - child(10) - end + function fromhost() + child(10) + end - function fromptx() - child(10) - return + function fromptx() + child(10) + return + end end - Native.code_native(devnull, fromptx, Tuple{}) - @test fromhost() == 11 + Native.code_native(devnull, mod.fromptx, Tuple{}) + @test mod.fromhost() == 11 end end @@ -374,23 +452,29 @@ end @testset "errors" begin -struct CleverType{T} - x::T -end -Base.unsafe_trunc(::Type{Int}, x::CleverType) = unsafe_trunc(Int, x.x) @testset "non-isbits arguments" begin - foobar(i) = (sink(unsafe_trunc(Int,i)); return) + mod = @eval module $(gensym()) + import ..sink + foobar(i) = (sink(unsafe_trunc(Int,i)); return) + end @test_throws_message(KernelError, - Native.code_execution(foobar, Tuple{BigInt})) do msg + Native.code_execution(mod.foobar, Tuple{BigInt})) do msg occursin("passing non-bitstype argument", msg) && occursin("BigInt", msg) end # test that we get information about fields and reason why something is not isbits + mod = @eval module $(gensym()) + struct CleverType{T} + x::T + end + Base.unsafe_trunc(::Type{Int}, x::CleverType) = unsafe_trunc(Int, x.x) + foobar(i) = (sink(unsafe_trunc(Int,i)); return) + end @test_throws_message(KernelError, - Native.code_execution(foobar, Tuple{CleverType{BigInt}})) do msg + Native.code_execution(mod.foobar, Tuple{mod.CleverType{BigInt}})) do msg occursin("passing non-bitstype argument", msg) && occursin("CleverType", msg) && occursin("BigInt", msg) @@ -399,7 +483,6 @@ end @testset "invalid LLVM IR" begin mod = @eval module $(gensym()) - export foobar foobar(i) = println(i) end @@ -416,7 +499,6 @@ end @testset "invalid LLVM IR (ccall)" begin mod = @eval module $(gensym()) - export foobar function foobar(p) unsafe_store!(p, ccall(:time, Cint, ())) return @@ -440,7 +522,6 @@ end @testset "delayed bindings" begin mod = @eval module $(gensym()) - export kernel function kernel() undefined return @@ -473,7 +554,6 @@ end @testset "dynamic call (apply)" begin mod = @eval module $(gensym()) - export func func() = println(1) end @@ -495,11 +575,14 @@ end mod = @eval module $(gensym()) kernel() = child() - child() = 0 + @inline child() = 0 end - ir = sprint(io->Native.code_llvm(io, mod.kernel, Tuple{})) - @test occursin("ret i64 0", ir) + @test @filecheck begin + check"CHECK-LABEL: @julia_kernel" + check"CHECK: ret i64 0" + Native.code_llvm(mod.kernel, Tuple{}) + end mod = @eval module $(gensym()) using ..GPUCompiler @@ -507,16 +590,20 @@ end Base.Experimental.@MethodTable(method_table) kernel() = child() - child() = 0 + @inline child() = 0 Base.Experimental.@overlay method_table child() = 1 end - ir = sprint(io->Native.code_llvm(io, mod.kernel, Tuple{}; mod.method_table)) - @test occursin("ret i64 1", ir) + @test @filecheck begin + check"CHECK-LABEL: @julia_kernel" + check"CHECK: ret i64 1" + Native.code_llvm(mod.kernel, Tuple{}; mod.method_table) + end end -@testset "#366: semi-concrete interpretation + overlay methods = dynamic dispatch" begin +@testset "semi-concrete interpretation + overlay methods" begin + # issue 366, caused dynamic deispatch mod = @eval module $(gensym()) using ..GPUCompiler using StaticArrays @@ -533,14 +620,17 @@ end (ccall("extern __nv_isnanf", llvmcall, Int32, (Cfloat,), x)) != 0 end - ir = sprint(io->Native.code_llvm(io, mod.kernel, Tuple{Int, Int}; - debuginfo=:none, mod.method_table)) - @test !occursin("apply_generic", ir) - @test occursin("llvm.floor", ir) + @test @filecheck begin + check"CHECK-LABEL: @julia_kernel" + check"CHECK-NOT: apply_generic" + check"CHECK: llvm.floor" + Native.code_llvm(mod.kernel, Tuple{Int, Int}; debuginfo=:none, mod.method_table) + end end -@testset "JuliaLang/julia#48097: kwcall inference in the presence of overlay method" begin - # XXX: broken again by JuliaLang/julia#51092, see JuliaGPU/GPUCompiler.jl#506 +@testset "kwcall inference + overlay method" begin + # originally broken by JuliaLang/julia#48097 + # broken again by JuliaLang/julia#51092, see JuliaGPU/GPUCompiler.jl#506 mod = @eval module $(gensym()) child(; kwargs...) = return @@ -553,11 +643,13 @@ end Base.Experimental.@overlay method_table @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = return end - ir = sprint(io->Native.code_llvm(io, mod.parent, Tuple{}; - debuginfo=:none, mod.method_table)) - - @test occursin("ret void", ir) - @test !any(f->occursin(f, ir), - ["jl_invoke", "apply_iterate", - "inttoptr", "apply_type"]) + @test @filecheck begin + check"CHECK-LABEL: @julia_parent" + check"CHECK-NOT: jl_invoke" + check"CHECK-NOT: apply_iterate" + check"CHECK-NOT: inttoptr" + check"CHECK-NOT: apply_type" + check"CHECK: ret void" + Native.code_llvm(mod.parent, Tuple{}; debuginfo=:none, mod.method_table) + end end diff --git a/test/ptx.jl b/test/ptx.jl index 5b2cfd8a..18dd0e4a 100644 --- a/test/ptx.jl +++ b/test/ptx.jl @@ -1,13 +1,18 @@ @testset "IR" begin @testset "exceptions" begin - foobar() = throw(DivideError()) - ir = sprint(io->PTX.code_llvm(io, foobar, Tuple{})) - - # plain exceptions should get lowered to a call to the GPU run-time - @test occursin("gpu_report_exception", ir) - # not a jl_throw referencing a jl_value_t representing the exception - @test !occursin("jl_throw", ir) + mod = @eval module $(gensym()) + foobar() = throw(DivideError()) + end + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_foobar_[0-9]+}}" + # plain exceptions should get lowered to a call to the GPU run-time + # not a jl_throw referencing a jl_value_t representing the exception + check"CHECK-NOT: jl_throw" + check"CHECK: gpu_report_exception" + + PTX.code_llvm(mod.foobar, Tuple{}; dump_module=true) + end end @testset "kernel functions" begin @@ -20,73 +25,97 @@ end end end - ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{mod.Aggregate})) - @test occursin(r"@(julia|j)_kernel\w*\(({ i64 }|\[1 x i64\])\* ", ir) || - occursin(r"@(julia|j)_kernel\w*\(ptr ", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"TYPED-SAME: ({{({ i64 }|\[1 x i64\])}}*" + check"OPAQUE-SAME: (ptr" + PTX.code_llvm(mod.kernel, Tuple{mod.Aggregate}) + end - ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{mod.Aggregate}; kernel=true)) - @test occursin(r"@_Z6kernel9Aggregate\(.*({ i64 }|\[1 x i64\]) ", ir) + @test @filecheck begin + check"CHECK-LABEL: define ptx_kernel void @_Z6kernel9Aggregate" + check"TYPED-NOT: *" + check"OPAQUE-NOT: ptr" + PTX.code_llvm(mod.kernel, Tuple{mod.Aggregate}; kernel=true) + end end @testset "property_annotations" begin - kernel() = return - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; dump_module=true)) - @test !occursin("nvvm.annotations", ir) - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true)) - @test occursin("nvvm.annotations", ir) - @test !occursin("maxntid", ir) - @test !occursin("reqntid", ir) - @test !occursin("minctasm", ir) - @test !occursin("maxnreg", ir) - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true, maxthreads=42)) - @test occursin("maxntidx\", i32 42", ir) - @test occursin("maxntidy\", i32 1", ir) - @test occursin("maxntidz\", i32 1", ir) - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true, minthreads=42)) - @test occursin("reqntidx\", i32 42", ir) - @test occursin("reqntidy\", i32 1", ir) - @test occursin("reqntidz\", i32 1", ir) - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true, blocks_per_sm=42)) - @test occursin("minctasm\", i32 42", ir) - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true, maxregs=42)) - @test occursin("maxnreg\", i32 42", ir) + mod = @eval module $(gensym()) + kernel() = return + end + + @test @filecheck begin + check"CHECK-NOT: nvvm.annotations" + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true) + end + + @test @filecheck begin + check"CHECK-NOT: maxntid" + check"CHECK-NOT: reqntid" + check"CHECK-NOT: minctasm" + check"CHECK-NOT: maxnreg" + check"CHECK: nvvm.annotations" + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true) + end + + @test @filecheck begin + check"CHECK: maxntidx\", i32 42" + check"CHECK: maxntidy\", i32 1" + check"CHECK: maxntidz\", i32 1" + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true, maxthreads=42) + end + + @test @filecheck begin + check"CHECK: reqntidx\", i32 42" + check"CHECK: reqntidy\", i32 1" + check"CHECK: reqntidz\", i32 1" + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true, minthreads=42) + end + + @test @filecheck begin + check"CHECK: minctasm\", i32 42" + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true, blocks_per_sm=42) + end + + @test @filecheck begin + check"CHECK: maxnreg\", i32 42" + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true, maxregs=42) + end end LLVM.version() >= v"8" && @testset "calling convention" begin - kernel() = return + mod = @eval module $(gensym()) + kernel() = return + end - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; dump_module=true)) - @test !occursin("ptx_kernel", ir) + @test @filecheck begin + check"CHECK-NOT: ptx_kernel" + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true) + end - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{}; - dump_module=true, kernel=true)) - @test occursin("ptx_kernel", ir) + @test @filecheck begin + check"CHECK: ptx_kernel" + PTX.code_llvm(mod.kernel, Tuple{}; dump_module=true, kernel=true) + end end @testset "kernel state" begin # state should be passed by value to kernel functions mod = @eval module $(gensym()) - export kernel kernel() = return end - ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{})) - @test occursin(r"@(julia|j)_kernel\w*\(\)", ir) + @test @filecheck begin + check"CHECK: @{{(julia|j)_kernel[0-9_]*}}()" + PTX.code_llvm(mod.kernel, Tuple{}) + end - ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{}; kernel=true)) - @test occursin("@_Z6kernel([1 x i64] %state)", ir) + @test @filecheck begin + check"CHECK: @_Z6kernel([1 x i64] %state)" + PTX.code_llvm(mod.kernel, Tuple{}; kernel=true) + end # state should only passed to device functions that use it @@ -104,20 +133,22 @@ end end end - ir = sprint(io->PTX.code_llvm(io, mod.kernel, Tuple{Ptr{Int64}}; - kernel=true, dump_module=true)) - # kernel should take state argument before all else - @test occursin(r"@_Z6kernelP5Int64\(\[1 x i64\] %state", ir) - + @test @filecheck begin + check"CHECK-LABEL: define ptx_kernel void @_Z6kernelP5Int64([1 x i64] %state" + check"CHECK-NOT: julia.gpu.state_getter" + PTX.code_llvm(mod.kernel, Tuple{Ptr{Int64}}; kernel=true, dump_module=true) + end # child1 doesn't use the state - @test occursin(r"@(julia|j)_child1\w*\((i64|i8\*|ptr)", ir) - + @test @filecheck begin + check"CHECK-LABEL: define{{.*}} i64 @{{(julia|j)_child1_[0-9]+}}" + PTX.code_llvm(mod.kernel, Tuple{Ptr{Int64}}; kernel=true, dump_module=true) + end # child2 does - @test occursin(r"@(julia|j)_child2\w*\(\[1 x i64\] %state", ir) - - # can't have the unlowered intrinsic - @test !occursin("julia.gpu.state_getter", ir) + @test @filecheck begin + check"CHECK-LABEL: define{{.*}} i64 @{{(julia|j)_child2_[0-9]+}}" + PTX.code_llvm(mod.kernel, Tuple{Ptr{Int64}}; kernel=true, dump_module=true) + end end end @@ -133,8 +164,6 @@ if :NVPTX in LLVM.backends() mod = @eval module $(gensym()) import ..sink - export child, parent - @noinline child(i) = sink(i) function parent(i) child(i) @@ -142,15 +171,17 @@ if :NVPTX in LLVM.backends() end end - asm = sprint(io->PTX.code_native(io, mod.parent, Tuple{Int64})) - @test occursin(r"call.uni\s+(julia|j)_child_"m, asm) + @test @filecheck begin + check"CHECK-LABEL: .visible .func {{(julia|j)_parent[0-9_]*}}" + check"CHECK: call.uni" + check"CHECK-NEXT: {{(julia|j)_child_}}" + PTX.code_native(mod.parent, Tuple{Int64}) + end end @testset "kernel functions" begin mod = @eval module $(gensym()) import ..sink - export nonentry, entry - @noinline nonentry(i) = sink(i) function entry(i) nonentry(i) @@ -158,32 +189,39 @@ end end end - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; - kernel=true, dump_module=true)) - @test occursin(".visible .entry _Z5entry5Int64", asm) - @test !occursin(r"\.visible \.func (julia|j)_nonentry", asm) - @test occursin(r"\.func (julia|j)_nonentry", asm) + @test @filecheck begin + check"CHECK-NOT: .visible .func {{(julia|j)_nonentry}}" + check"CHECK-LABEL: .visible .entry _Z5entry5Int64" + check"CHECK: {{(julia|j)_nonentry}}" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true, dump_module=true) + end @testset "property_annotations" begin - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; kernel=true)) - @test !occursin("maxntid", asm) + @test @filecheck begin + check"CHECK-NOT: maxntid" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true) + end - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; - kernel=true, maxthreads=42)) - @test occursin(".maxntid 42, 1, 1", asm) + @test @filecheck begin + check"CHECK: .maxntid 42, 1, 1" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true, maxthreads=42) + end - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; - kernel=true, minthreads=42)) - @test occursin(".reqntid 42, 1, 1", asm) + @test @filecheck begin + check"CHECK: .reqntid 42, 1, 1" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true, minthreads=42) + end - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; - kernel=true, blocks_per_sm=42)) - @test occursin(".minnctapersm 42", asm) + @test @filecheck begin + check"CHECK: .minnctapersm 42" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true, blocks_per_sm=42) + end if LLVM.version() >= v"4.0" - asm = sprint(io->PTX.code_native(io, mod.entry, Tuple{Int64}; - kernel=true, maxregs=42)) - @test occursin(".maxnreg 42", asm) + @test @filecheck begin + check"CHECK: .maxnreg 42" + PTX.code_native(mod.entry, Tuple{Int64}; kernel=true, maxregs=42) + end end end end @@ -194,8 +232,6 @@ end mod = @eval module $(gensym()) import ..sink - export child, parent1, parent2 - @noinline child(i) = sink(i) function parent1(i) child(i) @@ -207,11 +243,15 @@ end end end - asm = sprint(io->PTX.code_native(io, mod.parent1, Tuple{Int})) - @test occursin(r"\.func (julia|j)_child_", asm) + @test @filecheck begin + check"CHECK: .func {{(julia|j)_child}}" + PTX.code_native(mod.parent1, Tuple{Int}) + end - asm = sprint(io->PTX.code_native(io, mod.parent2, Tuple{Int})) - @test occursin(r"\.func (julia|j)_child_", asm) + @test @filecheck begin + check"CHECK: .func {{(julia|j)_child}}" + PTX.code_native(mod.parent2, Tuple{Int}) + end end @testset "child function reuse bis" begin @@ -220,8 +260,6 @@ end mod = @eval module $(gensym()) import ..sink - export parent1, parent2, child1, child2 - @noinline child1(i) = sink(i) @noinline child2(i) = sink(i+1) function parent1(i) @@ -234,13 +272,17 @@ end end end - asm = sprint(io->PTX.code_native(io, mod.parent1, Tuple{Int})) - @test occursin(r"\.func (julia|j)_child1_", asm) - @test occursin(r"\.func (julia|j)_child2_", asm) + @test @filecheck begin + check"CHECK-DAG: .func {{(julia|j)_child1}}" + check"CHECK-DAG: .func {{(julia|j)_child2}}" + PTX.code_native(mod.parent1, Tuple{Int}) + end - asm = sprint(io->PTX.code_native(io, mod.parent2, Tuple{Int})) - @test occursin(r"\.func (julia|j)_child1_", asm) - @test occursin(r"\.func (julia|j)_child2_", asm) + @test @filecheck begin + check"CHECK-DAG: .func {{(julia|j)_child1}}" + check"CHECK-DAG: .func {{(julia|j)_child2}}" + PTX.code_native(mod.parent2, Tuple{Int}) + end end @testset "indirect sysimg function use" begin @@ -248,34 +290,42 @@ end # (host fldmod1->mod1 throws, so the PTX code shouldn't contain a throw) # NOTE: Int32 to test for #49 - - function kernel(out) - wid, lane = fldmod1(unsafe_load(out), Int32(32)) - unsafe_store!(out, wid) - return + mod = @eval module $(gensym()) + function kernel(out) + wid, lane = fldmod1(unsafe_load(out), Int32(32)) + unsafe_store!(out, wid) + return + end end - asm = sprint(io->PTX.code_native(io, kernel, Tuple{Ptr{Int32}})) - @test !occursin("jl_throw", asm) - @test !occursin("jl_invoke", asm) # forced recompilation should still not invoke + @test @filecheck begin + check"CHECK-LABEL: .visible .func {{(julia|j)_kernel[0-9_]*}}" + check"CHECK-NOT: jl_throw" + check"CHECK-NOT: jl_invoke" + PTX.code_native(mod.kernel, Tuple{Ptr{Int32}}) + end end @testset "LLVM intrinsics" begin # issue #13 (a): cannot select trunc - function kernel(x) - unsafe_trunc(Int, x) - return + mod = @eval module $(gensym()) + function kernel(x) + unsafe_trunc(Int, x) + return + end end - PTX.code_native(devnull, kernel, Tuple{Float64}) + PTX.code_native(devnull, mod.kernel, Tuple{Float64}) @test "We did not crash!" != "" end @testset "exception arguments" begin - function kernel(a) - unsafe_store!(a, trunc(Int, unsafe_load(a))) - return + mod = @eval module $(gensym()) + function kernel(a) + unsafe_store!(a, trunc(Int, unsafe_load(a))) + return + end end - PTX.code_native(devnull, kernel, Tuple{Ptr{Float64}}) + PTX.code_native(devnull, mod.kernel, Tuple{Ptr{Float64}}) @test "We did not crash!" != "" end @@ -299,47 +349,57 @@ end end end - asm = sprint(io->PTX.code_native(io, mod.kernel, Tuple{Int})) - @test occursin("gpu_gc_pool_alloc", asm) - @test !occursin("julia.push_gc_frame", asm) - @test !occursin("julia.pop_gc_frame", asm) - @test !occursin("julia.get_gc_frame_slot", asm) - @test !occursin("julia.new_gc_frame", asm) - + @test @filecheck begin + check"CHECK-LABEL: .visible .func {{(julia|j)_kernel[0-9_]*}}" + check"CHECK-NOT: julia.push_gc_frame" + check"CHECK-NOT: julia.pop_gc_frame" + check"CHECK-NOT: julia.get_gc_frame_slot" + check"CHECK-NOT: julia.new_gc_frame" + check"CHECK: gpu_gc_pool_alloc" + PTX.code_native(mod.kernel, Tuple{Int}) + end # make sure that we can still ellide allocations - function ref_kernel(ptr, i) - data = Ref{Int64}() - data[] = 0 - if i > 1 - data[] = 1 - else - data[] = 2 + mod = @eval module $(gensym()) + function ref_kernel(ptr, i) + data = Ref{Int64}() + data[] = 0 + if i > 1 + data[] = 1 + else + data[] = 2 + end + unsafe_store!(ptr, data[], i) + return nothing end - unsafe_store!(ptr, data[], i) - return nothing end - asm = sprint(io->PTX.code_native(io, ref_kernel, Tuple{Ptr{Int64}, Int})) - - - @test !occursin("gpu_gc_pool_alloc", asm) + @test @filecheck begin + check"CHECK-LABEL: .visible .func {{(julia|j)_ref_kernel[0-9_]*}}" + check"CHECK-NOT: gpu_gc_pool_alloc" + PTX.code_native(mod.ref_kernel, Tuple{Ptr{Int64}, Int}) + end end @testset "float boxes" begin - function kernel(a,b) - c = Int32(a) - # the conversion to Int32 may fail, in which case the input Float32 is boxed in order to - # pass it to the @nospecialize exception constructor. we should really avoid that (eg. - # by avoiding @nospecialize, or optimize the unused arguments away), but for now the box - # should just work. - unsafe_store!(b, c) - return - end - - ir = sprint(io->PTX.code_llvm(io, kernel, Tuple{Float32,Ptr{Float32}})) - @test occursin("jl_box_float32", ir) - PTX.code_native(devnull, kernel, Tuple{Float32,Ptr{Float32}}) + mod = @eval module $(gensym()) + function kernel(a,b) + c = Int32(a) + # the conversion to Int32 may fail, in which case the input Float32 is boxed in + # order to pass it to the @nospecialize exception constructor. we should really + # avoid that (eg. by avoiding @nospecialize, or optimize the unused arguments + # away), but for now the box should just work. + unsafe_store!(b, c) + return + end + end + + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: jl_box_float32" + PTX.code_llvm(mod.kernel, Tuple{Float32,Ptr{Float32}}) + end + PTX.code_native(devnull, mod.kernel, Tuple{Float32,Ptr{Float32}}) end end diff --git a/test/setup.jl b/test/setup.jl index 07adf625..5744a675 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -9,6 +9,7 @@ for file in readdir(joinpath(@__DIR__, "helpers")) include(joinpath(@__DIR__, "helpers", file)) end end +using .FileCheck ## entry point diff --git a/test/spirv.jl b/test/spirv.jl index e14ccf77..be7f2651 100644 --- a/test/spirv.jl +++ b/test/spirv.jl @@ -4,60 +4,74 @@ for backend in (:khronos, :llvm) @testset "kernel functions" begin @testset "calling convention" begin - kernel() = return + mod = @eval module $(gensym()) + kernel() = return + end - ir = sprint(io->SPIRV.code_llvm(io, kernel, Tuple{}; backend, dump_module=true)) - @test !occursin("spir_kernel", ir) + @test @filecheck begin + check"CHECK-NOT: spir_kernel" + SPIRV.code_llvm(mod.kernel, Tuple{}; backend, dump_module=true) + end - ir = sprint(io->SPIRV.code_llvm(io, kernel, Tuple{}; - backend, dump_module=true, kernel=true)) - @test occursin("spir_kernel", ir) + @test @filecheck begin + check"CHECK: spir_kernel" + SPIRV.code_llvm(mod.kernel, Tuple{}; backend, dump_module=true, kernel=true) + end end @testset "byval workaround" begin mod = @eval module $(gensym()) - export kernel kernel(x) = return end - ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Tuple{Int}}; backend)) - @test occursin(r"@\w*kernel\w*\(({ i64 }|\[1 x i64\])\*", ir) || - occursin(r"@\w*kernel\w*\(ptr", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + SPIRV.code_llvm(mod.kernel, Tuple{Tuple{Int}}; backend) + end - ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Tuple{Int}}; - backend, kernel=true)) - @test occursin(r"@\w*kernel\w*\(.*{ ({ i64 }|\[1 x i64\]) }\*.+byval", ir) || - occursin(r"@\w*kernel\w*\(ptr byval", ir) + @test @filecheck begin + check"CHECK-LABEL: define spir_kernel void @_Z6kernel" + SPIRV.code_llvm(mod.kernel, Tuple{Tuple{Int}}; backend, kernel=true) + end end @testset "byval bug" begin # byval added alwaysinline, which could conflict with noinline and fail verification - @noinline kernel() = return - SPIRV.code_llvm(devnull, kernel, Tuple{}; backend, kernel=true) - @test "We did not crash!" != "" + mod = @eval module $(gensym()) + @noinline kernel() = return + end + @test @filecheck begin + check"CHECK-LABEL: define spir_kernel void @_Z6kernel" + SPIRV.code_llvm(mod.kernel, Tuple{}; backend, kernel=true) + end end end @testset "unsupported type detection" begin mod = @eval module $(gensym()) - export kernel function kernel(ptr, val) unsafe_store!(ptr, val) return end end - ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float16}, Float16}; - backend)) - @test occursin("store half", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: store half" + SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float16}, Float16}; backend) + end - ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float32}, Float32}; - backend)) - @test occursin("store float", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: store float" + SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float32}, Float32}; backend) + end - ir = sprint(io->SPIRV.code_llvm(io, mod.kernel, Tuple{Ptr{Float64}, Float64}; - backend)) - @test occursin("store double", ir) + @test @filecheck begin + check"CHECK-LABEL: define void @{{(julia|j)_kernel_[0-9]+}}" + check"CHECK: store double" + SPIRV.code_llvm(mod.kernel, Tuple{Ptr{Float64}, Float64}; backend) + end @test_throws_message(InvalidIRError, SPIRV.code_execution(mod.kernel, Tuple{Ptr{Float16}, Float16}; @@ -83,13 +97,17 @@ end @testset "asm" begin @testset "trap removal" begin - function kernel(x) - x && error() - return + mod = @eval module $(gensym()) + function kernel(x) + x && error() + return + end end - asm = sprint(io->SPIRV.code_native(io, kernel, Tuple{Bool}; backend, kernel=true)) - @test occursin(r"OpFunctionCall %void %(julia|j)_error", asm) + @test @filecheck begin + check"CHECK: %_Z6kernel4Bool = OpFunction %void None" + SPIRV.code_native(mod.kernel, Tuple{Bool}; backend, kernel=true) + end end end diff --git a/test/utils.jl b/test/utils.jl index f0de138d..6d17ab73 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -164,3 +164,26 @@ next_world = Base.get_world_counter() @test o_sin.overlayed == true end end + +# Test FileCheck +@testset "FileCheck" begin + @test @filecheck begin + check"CHECK: works" + println("works") + end + + @test_throws "expected string not found in input" @filecheck begin + check"CHECK: works" + println("doesn't work") + end + + @test @filecheck begin + check"CHECK: errors" + error("errors") + end + + @test_throws "expected string not found in input" @filecheck begin + check"CHECK: works" + error("errors") + end +end