Skip to content

Work towards atomic legalization #701

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions src/atomic_legalization.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# in bytes
function smallest_atomic_size(job)
return 4
end

# 1. Legalize sizes
# 2. Legalize ordering through fences
# 3. Legalize operations through cmpswp

function legalize_atomics!(job, ir)
dl = datalayout(ir)
for f in functions(ir), bb in blocks(f), inst in instructions(bb)
if inst isa LLVM.LoadInst && is_atomic(inst)
typ = value_type(inst)
if sizeof(dl, typ) < smallest_atomic_size(job)
# Replace with a larger atomic type
@dispose builder = IRBuilder() begin
position!(builder, inst)
ptr = only(operands(inst))
load = load!(builder, LLVM.IntType(smallest_atomic_size(job) * 8), ptr)
# TODO: alignment, ordering, etc.
# TODO: Handle floats and other types appropriately
# TODO: Do we need to shift the loaded value?
new_inst = trunc!(builder, load, typ)

replace_uses!(inst, new_inst)
erase!(inst)
end
end
elseif inst isa LLVM.StoreInst && is_atomic(inst)
end
end
return ir
end
20 changes: 12 additions & 8 deletions src/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -151,14 +151,7 @@ end

const __llvm_initialized = Ref(false)

@locked function emit_llvm(@nospecialize(job::CompilerJob); kwargs...)
# XXX: remove on next major version
if !isempty(kwargs)
Base.depwarn("The GPUCompiler `emit_llvm` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :emit_llvm)
config = CompilerConfig(job.config; kwargs...)
job = CompilerJob(job.source, config)
end

function initialize_llvm()
if !__llvm_initialized[]
InitializeAllTargets()
InitializeAllTargetInfos()
Expand All @@ -167,6 +160,17 @@ const __llvm_initialized = Ref(false)
InitializeAllTargetMCs()
__llvm_initialized[] = true
end
end

@locked function emit_llvm(@nospecialize(job::CompilerJob); kwargs...)
# XXX: remove on next major version
if !isempty(kwargs)
Base.depwarn("The GPUCompiler `emit_llvm` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :emit_llvm)
config = CompilerConfig(job.config; kwargs...)
job = CompilerJob(job.source, config)
end

initialize_llvm()

@tracepoint "IR generation" begin
ir, compiled = irgen(job)
Expand Down
113 changes: 113 additions & 0 deletions test/atomics.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
function run_pass(backend, pass, mod)
GPUCompiler.initialize_llvm()

fake_job, _ = backend.create_job(identity, (Int,))

# TODO: Set DL?
asm, meta = JuliaContext(opaque_pointers=true) do ctx
ir = parse(LLVM.Module, mod)
ir = pass(fake_job, ir)
GPUCompiler.emit_asm(fake_job, ir, LLVM.API.LLVMAssemblyFile)
end
write(stdout, asm)
end

@testset "PTX" begin
# PTX backend doesn't support larger than i64 atomics
@test @filecheck begin
mod = """define void @test(ptr %a) nounwind {
%1 = load atomic i128, ptr %a seq_cst, align 16
store atomic i128 %1, ptr %a seq_cst, align 16
ret void
}
"""
check"CHECK: LLVM error: Undefined external symbol \"__sync_val_compare_and_swap_16\""

run_pass(PTX, (_, ir)-> ir, mod)
end

# Note: Unordered gets eliminated here

@test @filecheck begin
mod = """define void @test(ptr %a) nounwind {
%1 = load atomic i64, ptr %a monotonic, align 8
store atomic i64 %1, ptr %a monotonic, align 8
ret void
}
"""
check"CHECK: .target sm_70"
check"CHECK: ld.volatile.u64"
check"CHECK: st.volatile.u64"

run_pass(PTX, (_, ir)-> ir, mod)
end

# Note: PTX backend doesn't support store/release yet
@test @filecheck begin
mod = """define void @test(ptr %a) nounwind {
%1 = load atomic i64, ptr %a acquire, align 8
store atomic i64 %1, ptr %a release, align 8
ret void
}
"""
check"CHECK: LLVM error: Cannot select: 0x{{[0-9_a-z]*}}: ch = AtomicStore<(store release (s64)"

run_pass(PTX, (_, ir)-> ir, mod)
end

# Note: PTX backend doesn't support seq_cst yet
@test @filecheck begin
mod = """define void @test(ptr %a) nounwind {
%1 = load atomic i64, ptr %a seq_cst, align 8
store atomic i64 %1, ptr %a seq_cst, align 8
ret void
}
"""
check"CHECK: LLVM error: Cannot select: 0x{{[0-9_a-z]*}}: ch = AtomicStore<(store seq_cst (s64)"

run_pass(PTX, (_, ir)-> ir, mod)
end

@test @filecheck begin
mod = """define void @test(ptr %a) nounwind {
%1 = load atomic i32, ptr %a monotonic, align 4
store atomic i32 %1, ptr %a monotonic, align 4
ret void
}
"""
check"CHECK: .target sm_70"
check"CHECK: ld.volatile.u32"
check"CHECK: st.volatile.u32"

run_pass(PTX, (_, ir)-> ir, mod)
end

@test @filecheck begin
mod = """define void @test(ptr %a) nounwind {
%1 = load atomic i16, ptr %a monotonic, align 2
store atomic i16 %1, ptr %a monotonic, align 2
ret void
}
"""
check"CHECK: .target sm_70"
check"CHECK: ld.volatile.u16"
check"CHECK: st.volatile.u16"

run_pass(PTX, (_, ir)-> ir, mod)
end

@test @filecheck begin
mod = """define void @test(ptr %a) nounwind {
%1 = load atomic i8, ptr %a monotonic, align 1
store atomic i8 %1, ptr %a monotonic, align 1
ret void
}
"""
check"CHECK: .target sm_70"
check"CHECK: ld.volatile.u8"
check"CHECK: st.volatile.u8"

run_pass(PTX, (_, ir)-> ir, mod)
end

end # PTX
Loading