diff --git a/src/atomic_legalization.jl b/src/atomic_legalization.jl new file mode 100644 index 00000000..49f89a57 --- /dev/null +++ b/src/atomic_legalization.jl @@ -0,0 +1,34 @@ +# in bytes +function smallest_atomic_size(job) + return 4 +end + +# 1. Legalize sizes +# 2. Legalize ordering through fences +# 3. Legalize operations through cmpswp + +function legalize_atomics!(job, ir) + dl = datalayout(ir) + for f in functions(ir), bb in blocks(f), inst in instructions(bb) + if inst isa LLVM.LoadInst && is_atomic(inst) + typ = value_type(inst) + if sizeof(dl, typ) < smallest_atomic_size(job) + # Replace with a larger atomic type + @dispose builder = IRBuilder() begin + position!(builder, inst) + ptr = only(operands(inst)) + load = load!(builder, LLVM.IntType(smallest_atomic_size(job) * 8), ptr) + # TODO: alignment, ordering, etc. + # TODO: Handle floats and other types appropriately + # TODO: Do we need to shift the loaded value? + new_inst = trunc!(builder, load, typ) + + replace_uses!(inst, new_inst) + erase!(inst) + end + end + elseif inst isa LLVM.StoreInst && is_atomic(inst) + end + end + return ir +end diff --git a/src/driver.jl b/src/driver.jl index d4c6ac70..06c4b50f 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -151,14 +151,7 @@ end const __llvm_initialized = Ref(false) -@locked function emit_llvm(@nospecialize(job::CompilerJob); kwargs...) - # XXX: remove on next major version - if !isempty(kwargs) - Base.depwarn("The GPUCompiler `emit_llvm` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :emit_llvm) - config = CompilerConfig(job.config; kwargs...) - job = CompilerJob(job.source, config) - end - +function initialize_llvm() if !__llvm_initialized[] InitializeAllTargets() InitializeAllTargetInfos() @@ -167,6 +160,17 @@ const __llvm_initialized = Ref(false) InitializeAllTargetMCs() __llvm_initialized[] = true end +end + +@locked function emit_llvm(@nospecialize(job::CompilerJob); kwargs...) + # XXX: remove on next major version + if !isempty(kwargs) + Base.depwarn("The GPUCompiler `emit_llvm` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :emit_llvm) + config = CompilerConfig(job.config; kwargs...) + job = CompilerJob(job.source, config) + end + + initialize_llvm() @tracepoint "IR generation" begin ir, compiled = irgen(job) diff --git a/test/atomics.jl b/test/atomics.jl new file mode 100644 index 00000000..e6557dea --- /dev/null +++ b/test/atomics.jl @@ -0,0 +1,113 @@ +function run_pass(backend, pass, mod) + GPUCompiler.initialize_llvm() + + fake_job, _ = backend.create_job(identity, (Int,)) + + # TODO: Set DL? + asm, meta = JuliaContext(opaque_pointers=true) do ctx + ir = parse(LLVM.Module, mod) + ir = pass(fake_job, ir) + GPUCompiler.emit_asm(fake_job, ir, LLVM.API.LLVMAssemblyFile) + end + write(stdout, asm) +end + +@testset "PTX" begin + # PTX backend doesn't support larger than i64 atomics + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i128, ptr %a seq_cst, align 16 + store atomic i128 %1, ptr %a seq_cst, align 16 + ret void + } + """ + check"CHECK: LLVM error: Undefined external symbol \"__sync_val_compare_and_swap_16\"" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + # Note: Unordered gets eliminated here + + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i64, ptr %a monotonic, align 8 + store atomic i64 %1, ptr %a monotonic, align 8 + ret void + } + """ + check"CHECK: .target sm_70" + check"CHECK: ld.volatile.u64" + check"CHECK: st.volatile.u64" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + # Note: PTX backend doesn't support store/release yet + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i64, ptr %a acquire, align 8 + store atomic i64 %1, ptr %a release, align 8 + ret void + } + """ + check"CHECK: LLVM error: Cannot select: 0x{{[0-9_a-z]*}}: ch = AtomicStore<(store release (s64)" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + # Note: PTX backend doesn't support seq_cst yet + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i64, ptr %a seq_cst, align 8 + store atomic i64 %1, ptr %a seq_cst, align 8 + ret void + } + """ + check"CHECK: LLVM error: Cannot select: 0x{{[0-9_a-z]*}}: ch = AtomicStore<(store seq_cst (s64)" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i32, ptr %a monotonic, align 4 + store atomic i32 %1, ptr %a monotonic, align 4 + ret void + } + """ + check"CHECK: .target sm_70" + check"CHECK: ld.volatile.u32" + check"CHECK: st.volatile.u32" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i16, ptr %a monotonic, align 2 + store atomic i16 %1, ptr %a monotonic, align 2 + ret void + } + """ + check"CHECK: .target sm_70" + check"CHECK: ld.volatile.u16" + check"CHECK: st.volatile.u16" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i8, ptr %a monotonic, align 1 + store atomic i8 %1, ptr %a monotonic, align 1 + ret void + } + """ + check"CHECK: .target sm_70" + check"CHECK: ld.volatile.u8" + check"CHECK: st.volatile.u8" + + run_pass(PTX, (_, ir)-> ir, mod) + end + +end # PTX \ No newline at end of file