-
Notifications
You must be signed in to change notification settings - Fork 46
Add atomic float support #399
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/lib/intrinsics/src/atomic.jl b/lib/intrinsics/src/atomic.jl
index a93f926..d627741 100644
--- a/lib/intrinsics/src/atomic.jl
+++ b/lib/intrinsics/src/atomic.jl
@@ -47,17 +47,21 @@ for gentype in atomic_types, as in atomic_memory_types
@device_function atomic_xor!(p::LLVMPtr{$gentype,$as}, val::$gentype) =
@builtin_ccall("atomic_xor", $gentype,
(LLVMPtr{$gentype,$as}, $gentype), p, val)
-end
-if gentype in atomic_integer_types
- @eval begin
- @device_function atomic_xchg!(p::LLVMPtr{$gentype,$as}, val::$gentype) =
- @builtin_ccall("atomic_xchg", $gentype,
- (LLVMPtr{$gentype,$as}, $gentype), p, val)
-
- @device_function atomic_cmpxchg!(p::LLVMPtr{$gentype,$as}, cmp::$gentype, val::$gentype) =
- @builtin_ccall("atomic_cmpxchg", $gentype,
- (LLVMPtr{$gentype,$as}, $gentype, $gentype), p, cmp, val)
end
+ if gentype in atomic_integer_types
+ @eval begin
+ @device_function atomic_xchg!(p::LLVMPtr{$gentype, $as}, val::$gentype) =
+ @builtin_ccall(
+ "atomic_xchg", $gentype,
+ (LLVMPtr{$gentype, $as}, $gentype), p, val
+ )
+
+ @device_function atomic_cmpxchg!(p::LLVMPtr{$gentype, $as}, cmp::$gentype, val::$gentype) =
+ @builtin_ccall(
+ "atomic_cmpxchg", $gentype,
+ (LLVMPtr{$gentype, $as}, $gentype, $gentype), p, cmp, val
+ )
+ end
end
end
diff --git a/test/atomics.jl b/test/atomics.jl
index 71fefa8..9a7799e 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -8,43 +8,43 @@ all_types = vcat(integer_types, float_types)
dev = OpenCL.cl.device()
# Arithmetic operations
-function test_atomic_add(counter::AbstractArray{T}) where T
+function test_atomic_add(counter::AbstractArray{T}) where {T}
OpenCL.@atomic counter[] += one(T)
return
end
-function test_atomic_sub(counter::AbstractArray{T}) where T
+function test_atomic_sub(counter::AbstractArray{T}) where {T}
OpenCL.@atomic counter[] -= one(T)
return
end
# Bitwise operations
-function test_atomic_and(counter::AbstractArray{T}) where T
+function test_atomic_and(counter::AbstractArray{T}) where {T}
OpenCL.@atomic counter[] &= ~(one(T) << (get_global_id() - 1))
return
end
-function test_atomic_or(counter::AbstractArray{T}) where T
+function test_atomic_or(counter::AbstractArray{T}) where {T}
OpenCL.@atomic counter[] |= one(T) << (get_global_id() - 1)
return
end
-function test_atomic_xor(counter::AbstractArray{T}) where T
+function test_atomic_xor(counter::AbstractArray{T}) where {T}
OpenCL.@atomic counter[] ⊻= one(T) << ((get_global_id() - 1) % 32)
return
end
# Min/max operations - use low-level API directly
-function test_atomic_max(counter::AbstractArray{T}) where T
+function test_atomic_max(counter::AbstractArray{T}) where {T}
OpenCL.atomic_max!(pointer(counter), T(get_global_id()))
return
end
-function test_atomic_min(counter::AbstractArray{T}) where T
+function test_atomic_min(counter::AbstractArray{T}) where {T}
OpenCL.atomic_min!(pointer(counter), T(get_global_id()))
return
end
# Exchange operation - use low-level API directly
-function test_atomic_xchg(counter::AbstractArray{T}) where T
+function test_atomic_xchg(counter::AbstractArray{T}) where {T}
OpenCL.atomic_xchg!(pointer(counter), one(T))
return
end
# Compare-and-swap operation - use low-level API directly
-function test_atomic_cas(counter::AbstractArray{T}) where T
+function test_atomic_cas(counter::AbstractArray{T}) where {T}
OpenCL.atomic_cmpxchg!(pointer(counter), zero(T), one(T))
return
end
@@ -63,61 +63,61 @@ atomic_operations = [
(test_atomic_cas, 0, 1),
]
@testset "atomics" begin
-@testset "$kernel_func - $T" for (kernel_func, init_val, expected_val) in atomic_operations, T in all_types
- # Skip Int64/UInt64 if not supported
- if sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in dev.extensions)
- continue
- end
+ @testset "$kernel_func - $T" for (kernel_func, init_val, expected_val) in atomic_operations, T in all_types
+ # Skip Int64/UInt64 if not supported
+ if sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in dev.extensions)
+ continue
+ end
- # Skip Float64 if not supported
- if T == Float64 && !("cl_khr_fp64" in dev.extensions)
- continue
- end
+ # Skip Float64 if not supported
+ if T == Float64 && !("cl_khr_fp64" in dev.extensions)
+ continue
+ end
- # Bitwise operations (only valid for integers)
- if kernel_func in [test_atomic_and, test_atomic_or, test_atomic_xor] && T <: AbstractFloat
- continue
+ # Bitwise operations (only valid for integers)
+ if kernel_func in [test_atomic_and, test_atomic_or, test_atomic_xor] && T <: AbstractFloat
+ continue
end
- # Min/max operations (only supported for 32-bit integers in OpenCL)
- if kernel_func in [test_atomic_min, test_atomic_max] && !(T in [Int32, UInt32])
- continue
- end
+ # Min/max operations (only supported for 32-bit integers in OpenCL)
+ if kernel_func in [test_atomic_min, test_atomic_max] && !(T in [Int32, UInt32])
+ continue
+ end
- if T <: Integer
- init_val %= T
- expected_val %= T
- end
+ if T <: Integer
+ init_val %= T
+ expected_val %= T
+ end
- a = OpenCL.fill(T(init_val))
- @opencl global_size=1000 kernel_func(a)
- result_val = OpenCL.@allowscalar a[]
- @test result_val === T(expected_val)
+ a = OpenCL.fill(T(init_val))
+ @opencl global_size = 1000 kernel_func(a)
+ result_val = OpenCL.@allowscalar a[]
+ @test result_val === T(expected_val)
end
-@testset "atomic_add! ($T)" for T in [Float32, Float64]
- # Float64 requires cl_khr_fp64 extension
- if T == Float64 && !("cl_khr_fp64" in cl.device().extensions)
- continue
- end
+ @testset "atomic_add! ($T)" for T in [Float32, Float64]
+ # Float64 requires cl_khr_fp64 extension
+ if T == Float64 && !("cl_khr_fp64" in cl.device().extensions)
+ continue
+ end
if "cl_ext_float_atomics" in cl.device().extensions
- @eval function atomic_float_add(counter, val::$T)
+ @eval function atomic_float_add(counter, val::$T)
@builtin_ccall(
- "atomic_add", $T,
- (LLVMPtr{$T, AS.CrossWorkgroup}, $T),
+ "atomic_add", $T,
+ (LLVMPtr{$T, AS.CrossWorkgroup}, $T),
pointer(counter), val,
)
return
end
@testset "SPV_EXT_shader_atomic_float_add extension" begin
- a = OpenCL.zeros(T)
- @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, one(T))
- @test OpenCL.@allowscalar a[] == T(1000.0)
+ a = OpenCL.zeros(T)
+ @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, one(T))
+ @test OpenCL.@allowscalar a[] == T(1000.0)
spv = sprint() do io
- OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{T, 0, 1}, T}; extensions = ["SPV_EXT_shader_atomic_float_add"])
+ OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{T, 0, 1}, T}; extensions = ["SPV_EXT_shader_atomic_float_add"])
end
@test occursin("OpExtension \"SPV_EXT_shader_atomic_float_add\"", spv)
@test occursin("OpAtomicFAddEXT", spv) |
Codecov Report✅ All modified and coverable lines are covered by tests. Additional details and impacted files@@ Coverage Diff @@
## master #399 +/- ##
==========================================
+ Coverage 80.27% 80.68% +0.41%
==========================================
Files 12 12
Lines 730 730
==========================================
+ Hits 586 589 +3
+ Misses 144 141 -3 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
|
Bump. Can someone take a look? @vchuravy ? I don't see how the failing tests are connected to the changes here. |
|
Thanks. Can you add a test that works here too? |
1ac8383 to
e962893
Compare
|
@maleadt Added tests and rebased. |
simeonschaub
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test can now be modified to actually call the atomic_add! method from SPIRVIntrinsics instead of defining its own function. It is also not exercising the Float64 path ATM, since Float32 is hard coded inside the test.
Ideally, we would also be testing the other atomic intrinsics that have been defined for Float32 and Float64 now. (Are the bitwise intrinsics like atomic_or! and friends even defined for floats?)
|
@simeonschaub I tried to add more tests, but except for Can we remove the |
|
A restful night resolved the issues. This should pass now. A few comments:
|
|
@simeonschaub @maleadt @vchuravy Bump. I don't think the errors are related to my code changes. |
8d36d26 to
58f3b27
Compare
simeonschaub
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for taking so long to review! I took the liberty of improving some of the tests a little, hope that's ok
|
Of course! Who can merge? @vchuravy? |
JuliaGPU/oneAPI.jl#544 Adding the
SPV_EXT_shader_atomic_float_addextension in oneAPI.jl seems to work.