From 6647cef49f9c52b30fc560aafe298095f5a23aab Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 17 Jun 2025 08:13:17 +0200 Subject: [PATCH 1/3] Attempt atomic legalization --- atomics.jl | 64 ++++++++++++++++++++++++++++++++++++++ src/atomic_legalization.jl | 34 ++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 atomics.jl create mode 100644 src/atomic_legalization.jl diff --git a/atomics.jl b/atomics.jl new file mode 100644 index 00000000..5eac7cc2 --- /dev/null +++ b/atomics.jl @@ -0,0 +1,64 @@ +using GPUCompiler +using LLVM + +function initialize() + if !GPUCompiler.__llvm_initialized[] + InitializeAllTargets() + InitializeAllTargetInfos() + InitializeAllAsmPrinters() + InitializeAllAsmParsers() + InitializeAllTargetMCs() + GPUCompiler.__llvm_initialized[] = true + end +end + +# include all helpers +include(joinpath("test", "helpers", "runtime.jl")) +include(joinpath("test", "helpers", "ptx.jl")) + +job, _ = PTX.create_job(identity, (Int,)) + +includet("src/atomic_legalization.jl") + +# mod = """ +# define void @test(ptr %a) nounwind { +# %1 = load atomic i128, ptr %a seq_cst, align 16 +# store atomic i128 %1, ptr %a seq_cst, align 16 +# ret void +# } +# """ +# => __sync_val_compare_and_swap_16 + + +# mod = """ +# define void @test(ptr %a) nounwind { +# %1 = load atomic i8, ptr %a seq_cst, align 16 +# store atomic i8 %1, ptr %a seq_cst, align 16 +# ret void +# } +# """ + +# Cannot select: 0x67a0660: ch = AtomicStore<(store seq_cst (s8) into %ir.a, align 16)> 0x67a05f0:1, 0x67a0580, 0x67a05f0 +# 0x67a0580: i64,ch = load<(dereferenceable invariant load (s64) from `ptr addrspace(101) null`, addrspace 101)> 0x7125d30, TargetExternalSymbol:i64'test_param_0', undef:i64 +# 0x67a0200: i64 = TargetExternalSymbol'test_param_0' +# 0x67a02e0: i64 = undef +# 0x67a05f0: i16,ch = AtomicLoad<(load seq_cst (s8) from %ir.a, align 16)> 0x7125d30, 0x67a0580 +# 0x67a0580: i64,ch = load<(dereferenceable invariant load (s64) from `ptr addrspace(101) null`, addrspace 101)> 0x7125d30, TargetExternalSymbol:i64'test_param_0', undef:i64 +# 0x67a0200: i64 = TargetExternalSymbol'test_param_0' +# 0x67a02e0: i64 = undef + +mod = """ +define i8 @test(ptr %a) nounwind { + %1 = load atomic i8, ptr %a seq_cst, align 16 + ret i8 %1 +} +""" + +asm, meta = JuliaContext(opaque_pointers=true) do ctx + initialize() + ir = parse(LLVM.Module, mod) + ir = legalize_atomics!(job, ir) + GPUCompiler.emit_asm(job, ir, LLVM.API.LLVMAssemblyFile) +end + + diff --git a/src/atomic_legalization.jl b/src/atomic_legalization.jl new file mode 100644 index 00000000..49f89a57 --- /dev/null +++ b/src/atomic_legalization.jl @@ -0,0 +1,34 @@ +# in bytes +function smallest_atomic_size(job) + return 4 +end + +# 1. Legalize sizes +# 2. Legalize ordering through fences +# 3. Legalize operations through cmpswp + +function legalize_atomics!(job, ir) + dl = datalayout(ir) + for f in functions(ir), bb in blocks(f), inst in instructions(bb) + if inst isa LLVM.LoadInst && is_atomic(inst) + typ = value_type(inst) + if sizeof(dl, typ) < smallest_atomic_size(job) + # Replace with a larger atomic type + @dispose builder = IRBuilder() begin + position!(builder, inst) + ptr = only(operands(inst)) + load = load!(builder, LLVM.IntType(smallest_atomic_size(job) * 8), ptr) + # TODO: alignment, ordering, etc. + # TODO: Handle floats and other types appropriately + # TODO: Do we need to shift the loaded value? + new_inst = trunc!(builder, load, typ) + + replace_uses!(inst, new_inst) + erase!(inst) + end + end + elseif inst isa LLVM.StoreInst && is_atomic(inst) + end + end + return ir +end From 510d6c7819668199928aa527e1b9b85157dca89e Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 25 Jun 2025 10:53:18 +0200 Subject: [PATCH 2/3] add some PTX tests for load/store ordering --- src/driver.jl | 20 +++++---- test/atomics.jl | 113 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 8 deletions(-) create mode 100644 test/atomics.jl diff --git a/src/driver.jl b/src/driver.jl index d4c6ac70..06c4b50f 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -151,14 +151,7 @@ end const __llvm_initialized = Ref(false) -@locked function emit_llvm(@nospecialize(job::CompilerJob); kwargs...) - # XXX: remove on next major version - if !isempty(kwargs) - Base.depwarn("The GPUCompiler `emit_llvm` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :emit_llvm) - config = CompilerConfig(job.config; kwargs...) - job = CompilerJob(job.source, config) - end - +function initialize_llvm() if !__llvm_initialized[] InitializeAllTargets() InitializeAllTargetInfos() @@ -167,6 +160,17 @@ const __llvm_initialized = Ref(false) InitializeAllTargetMCs() __llvm_initialized[] = true end +end + +@locked function emit_llvm(@nospecialize(job::CompilerJob); kwargs...) + # XXX: remove on next major version + if !isempty(kwargs) + Base.depwarn("The GPUCompiler `emit_llvm` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :emit_llvm) + config = CompilerConfig(job.config; kwargs...) + job = CompilerJob(job.source, config) + end + + initialize_llvm() @tracepoint "IR generation" begin ir, compiled = irgen(job) diff --git a/test/atomics.jl b/test/atomics.jl new file mode 100644 index 00000000..e6557dea --- /dev/null +++ b/test/atomics.jl @@ -0,0 +1,113 @@ +function run_pass(backend, pass, mod) + GPUCompiler.initialize_llvm() + + fake_job, _ = backend.create_job(identity, (Int,)) + + # TODO: Set DL? + asm, meta = JuliaContext(opaque_pointers=true) do ctx + ir = parse(LLVM.Module, mod) + ir = pass(fake_job, ir) + GPUCompiler.emit_asm(fake_job, ir, LLVM.API.LLVMAssemblyFile) + end + write(stdout, asm) +end + +@testset "PTX" begin + # PTX backend doesn't support larger than i64 atomics + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i128, ptr %a seq_cst, align 16 + store atomic i128 %1, ptr %a seq_cst, align 16 + ret void + } + """ + check"CHECK: LLVM error: Undefined external symbol \"__sync_val_compare_and_swap_16\"" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + # Note: Unordered gets eliminated here + + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i64, ptr %a monotonic, align 8 + store atomic i64 %1, ptr %a monotonic, align 8 + ret void + } + """ + check"CHECK: .target sm_70" + check"CHECK: ld.volatile.u64" + check"CHECK: st.volatile.u64" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + # Note: PTX backend doesn't support store/release yet + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i64, ptr %a acquire, align 8 + store atomic i64 %1, ptr %a release, align 8 + ret void + } + """ + check"CHECK: LLVM error: Cannot select: 0x{{[0-9_a-z]*}}: ch = AtomicStore<(store release (s64)" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + # Note: PTX backend doesn't support seq_cst yet + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i64, ptr %a seq_cst, align 8 + store atomic i64 %1, ptr %a seq_cst, align 8 + ret void + } + """ + check"CHECK: LLVM error: Cannot select: 0x{{[0-9_a-z]*}}: ch = AtomicStore<(store seq_cst (s64)" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i32, ptr %a monotonic, align 4 + store atomic i32 %1, ptr %a monotonic, align 4 + ret void + } + """ + check"CHECK: .target sm_70" + check"CHECK: ld.volatile.u32" + check"CHECK: st.volatile.u32" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i16, ptr %a monotonic, align 2 + store atomic i16 %1, ptr %a monotonic, align 2 + ret void + } + """ + check"CHECK: .target sm_70" + check"CHECK: ld.volatile.u16" + check"CHECK: st.volatile.u16" + + run_pass(PTX, (_, ir)-> ir, mod) + end + + @test @filecheck begin + mod = """define void @test(ptr %a) nounwind { + %1 = load atomic i8, ptr %a monotonic, align 1 + store atomic i8 %1, ptr %a monotonic, align 1 + ret void + } + """ + check"CHECK: .target sm_70" + check"CHECK: ld.volatile.u8" + check"CHECK: st.volatile.u8" + + run_pass(PTX, (_, ir)-> ir, mod) + end + +end # PTX \ No newline at end of file From c6fbdb53fda69a29ab547007f16e46d1bd7500fe Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 25 Jun 2025 10:55:49 +0200 Subject: [PATCH 3/3] remove old file --- atomics.jl | 64 ------------------------------------------------------ 1 file changed, 64 deletions(-) delete mode 100644 atomics.jl diff --git a/atomics.jl b/atomics.jl deleted file mode 100644 index 5eac7cc2..00000000 --- a/atomics.jl +++ /dev/null @@ -1,64 +0,0 @@ -using GPUCompiler -using LLVM - -function initialize() - if !GPUCompiler.__llvm_initialized[] - InitializeAllTargets() - InitializeAllTargetInfos() - InitializeAllAsmPrinters() - InitializeAllAsmParsers() - InitializeAllTargetMCs() - GPUCompiler.__llvm_initialized[] = true - end -end - -# include all helpers -include(joinpath("test", "helpers", "runtime.jl")) -include(joinpath("test", "helpers", "ptx.jl")) - -job, _ = PTX.create_job(identity, (Int,)) - -includet("src/atomic_legalization.jl") - -# mod = """ -# define void @test(ptr %a) nounwind { -# %1 = load atomic i128, ptr %a seq_cst, align 16 -# store atomic i128 %1, ptr %a seq_cst, align 16 -# ret void -# } -# """ -# => __sync_val_compare_and_swap_16 - - -# mod = """ -# define void @test(ptr %a) nounwind { -# %1 = load atomic i8, ptr %a seq_cst, align 16 -# store atomic i8 %1, ptr %a seq_cst, align 16 -# ret void -# } -# """ - -# Cannot select: 0x67a0660: ch = AtomicStore<(store seq_cst (s8) into %ir.a, align 16)> 0x67a05f0:1, 0x67a0580, 0x67a05f0 -# 0x67a0580: i64,ch = load<(dereferenceable invariant load (s64) from `ptr addrspace(101) null`, addrspace 101)> 0x7125d30, TargetExternalSymbol:i64'test_param_0', undef:i64 -# 0x67a0200: i64 = TargetExternalSymbol'test_param_0' -# 0x67a02e0: i64 = undef -# 0x67a05f0: i16,ch = AtomicLoad<(load seq_cst (s8) from %ir.a, align 16)> 0x7125d30, 0x67a0580 -# 0x67a0580: i64,ch = load<(dereferenceable invariant load (s64) from `ptr addrspace(101) null`, addrspace 101)> 0x7125d30, TargetExternalSymbol:i64'test_param_0', undef:i64 -# 0x67a0200: i64 = TargetExternalSymbol'test_param_0' -# 0x67a02e0: i64 = undef - -mod = """ -define i8 @test(ptr %a) nounwind { - %1 = load atomic i8, ptr %a seq_cst, align 16 - ret i8 %1 -} -""" - -asm, meta = JuliaContext(opaque_pointers=true) do ctx - initialize() - ir = parse(LLVM.Module, mod) - ir = legalize_atomics!(job, ir) - GPUCompiler.emit_asm(job, ir, LLVM.API.LLVMAssemblyFile) -end - -