Skip to content

Commit b725007

Browse files
Support 64-bit atomics (#347)
Co-authored-by: Tim Besard <[email protected]>
1 parent e54fc8a commit b725007

File tree

5 files changed

+40
-14
lines changed

5 files changed

+40
-14
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Preferences = "1"
3131
Printf = "1"
3232
Random = "1"
3333
Reexport = "1"
34-
SPIRVIntrinsics = "0.4"
34+
SPIRVIntrinsics = "0.5"
3535
SPIRV_LLVM_Backend_jll = "20"
3636
SPIRV_Tools_jll = "2025.1"
3737
StaticArrays = "1"

lib/intrinsics/Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "SPIRVIntrinsics"
22
uuid = "71d1d633-e7e8-4a92-83a1-de8814b09ba8"
33
authors = ["Tim Besard <[email protected]>"]
4-
version = "0.4.0"
4+
version = "0.5.0"
55

66
[deps]
77
ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"

lib/intrinsics/src/atomic.jl

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
11
# Atomic Functions
22

3-
# TODO: support for 64-bit atomics via atom_cmpxchg (from cl_khr_int64_base_atomics)
3+
# provides atomic functions that rely on the OpenCL base atomics, as well as the
4+
# cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions.
45

5-
# "atomic operations on 32-bit signed, unsigned integers and single precision
6-
# floating-point to locations in __global or __local memory"
7-
8-
const atomic_integer_types = [UInt32, Int32]
9-
# TODO: 64-bit atomics with ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS
10-
# TODO: additional floating-point atomics with ZE_extension_float_atomics
6+
const atomic_integer_types = [UInt32, Int32, UInt64, Int64]
117
const atomic_memory_types = [AS.Workgroup, AS.CrossWorkgroup]
128

139

@@ -67,15 +63,23 @@ end
6763
for as in atomic_memory_types
6864
@eval begin
6965

66+
# There is native support for atomic_xchg on Float32, but not for Float64,
67+
# so we always reinterpret for consistency.
7068
@device_function atomic_xchg!(p::LLVMPtr{Float32,$as}, val::Float32) =
71-
@builtin_ccall("atomic_xchg", Float32, (LLVMPtr{Float32,$as}, Float32,), p, val)
69+
reinterpret(Float32, atomic_xchg!(reinterpret(LLVMPtr{UInt32,$as}, p),
70+
reinterpret(UInt32, val)))
71+
@device_function atomic_xchg!(p::LLVMPtr{Float64,$as}, val::Float64) =
72+
reinterpret(Float64, atomic_xchg!(reinterpret(LLVMPtr{UInt64,$as}, p),
73+
reinterpret(UInt64, val)))
7274

73-
# XXX: why is only xchg supported on floats? isn't it safe for cmpxchg too,
74-
# which should only perform bitwise comparisons?
7575
@device_function atomic_cmpxchg!(p::LLVMPtr{Float32,$as}, cmp::Float32, val::Float32) =
7676
reinterpret(Float32, atomic_cmpxchg!(reinterpret(LLVMPtr{UInt32,$as}, p),
7777
reinterpret(UInt32, cmp),
7878
reinterpret(UInt32, val)))
79+
@device_function atomic_cmpxchg!(p::LLVMPtr{Float64,$as}, cmp::Float64, val::Float64) =
80+
reinterpret(Float64, atomic_cmpxchg!(reinterpret(LLVMPtr{UInt64,$as}, p),
81+
reinterpret(UInt64, cmp),
82+
reinterpret(UInt64, val)))
7983

8084
end
8185
end
@@ -239,6 +243,11 @@ end
239243
atomic_arrayset(A, Base._to_linear_index(A, Is...), op, convert(T, val))
240244

241245
# native atomics
246+
# TODO: support inc/dec
247+
# TODO: this depends on available extensions
248+
# - UInt64: requires cl_khr_int64_base_atomics for add/sub/inc/dec,
249+
# requires cl_khr_int64_extended_atomics for min/max/and/or/xor
250+
# - Float64: always should hit the fallback
242251
for (op,impl) in [(+) => atomic_add!,
243252
(-) => atomic_sub!,
244253
(&) => atomic_and!,
@@ -247,11 +256,12 @@ for (op,impl) in [(+) => atomic_add!,
247256
Base.max => atomic_max!,
248257
Base.min => atomic_min!]
249258
@eval @inline atomic_arrayset(A::AbstractArray{T}, I::Integer, ::typeof($op),
250-
val::T) where {T <: Union{Int32,UInt32}} =
259+
val::T) where {T <: Union{atomic_integer_types...}} =
251260
$impl(pointer(A, I), val)
252261
end
253262

254263
# fallback using compare-and-swap
264+
# TODO: for 64-bit types, this depends on cl_khr_int64_base_atomics
255265
function atomic_arrayset(A::AbstractArray{T}, I::Integer, op::Function, val) where {T}
256266
ptr = pointer(A, I)
257267
old = Base.unsafe_load(ptr, 1)

test/atomics.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
@testset "atomics" begin
2+
3+
function atomic_count(counter)
4+
OpenCL.@atomic counter[] += 1
5+
return
6+
end
7+
8+
@testset "atomic_add! ($T)" for T in [Int32, UInt32, Int64, UInt64]
9+
if sizeof(T) == 4 || "cl_khr_int64_extended_atomics" in cl.device().extensions
10+
a = OpenCL.zeros(T)
11+
@opencl global_size=1000 atomic_count(a)
12+
@test OpenCL.@allowscalar a[] == 1000
13+
end
14+
end
15+
16+
end

test/setup.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ function runtests(f, name, platform_filter)
9090
end
9191

9292
# some tests require native execution capabilities
93-
requires_il = name in ["execution", "intrinsics", "kernelabstractions"] ||
93+
requires_il = name in ["atomics", "execution", "intrinsics", "kernelabstractions"] ||
9494
startswith(name, "gpuarrays/")
9595

9696
ex = quote

0 commit comments

Comments
 (0)