diff --git a/lib/intrinsics/Project.toml b/lib/intrinsics/Project.toml index ead792b9..338bb329 100644 --- a/lib/intrinsics/Project.toml +++ b/lib/intrinsics/Project.toml @@ -1,7 +1,7 @@ name = "SPIRVIntrinsics" uuid = "71d1d633-e7e8-4a92-83a1-de8814b09ba8" authors = ["Tim Besard "] -version = "0.5.5" +version = "0.5.6" [deps] ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04" diff --git a/lib/intrinsics/src/atomic.jl b/lib/intrinsics/src/atomic.jl index 9bbbdbe6..a93f926e 100644 --- a/lib/intrinsics/src/atomic.jl +++ b/lib/intrinsics/src/atomic.jl @@ -3,13 +3,15 @@ # provides atomic functions that rely on the OpenCL base atomics, as well as the # cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions. +const atomic_float_types = [Float32, Float64] const atomic_integer_types = [UInt32, Int32, UInt64, Int64] const atomic_memory_types = [AS.Workgroup, AS.CrossWorkgroup] +const atomic_types = vcat(atomic_float_types, atomic_integer_types) # generically typed -for gentype in atomic_integer_types, as in atomic_memory_types +for gentype in atomic_types, as in atomic_memory_types @eval begin @device_function atomic_add!(p::LLVMPtr{$gentype,$as}, val::$gentype) = @@ -45,15 +47,17 @@ for gentype in atomic_integer_types, as in atomic_memory_types @device_function atomic_xor!(p::LLVMPtr{$gentype,$as}, val::$gentype) = @builtin_ccall("atomic_xor", $gentype, (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_xchg!(p::LLVMPtr{$gentype,$as}, val::$gentype) = - @builtin_ccall("atomic_xchg", $gentype, - (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_cmpxchg!(p::LLVMPtr{$gentype,$as}, cmp::$gentype, val::$gentype) = - @builtin_ccall("atomic_cmpxchg", $gentype, - (LLVMPtr{$gentype,$as}, $gentype, $gentype), p, cmp, val) - +end +if gentype in atomic_integer_types + @eval begin + @device_function atomic_xchg!(p::LLVMPtr{$gentype,$as}, val::$gentype) = + @builtin_ccall("atomic_xchg", $gentype, + (LLVMPtr{$gentype,$as}, $gentype), p, val) + + @device_function atomic_cmpxchg!(p::LLVMPtr{$gentype,$as}, cmp::$gentype, val::$gentype) = + @builtin_ccall("atomic_cmpxchg", $gentype, + (LLVMPtr{$gentype,$as}, $gentype, $gentype), p, cmp, val) + end end end diff --git a/test/atomics.jl b/test/atomics.jl index f46a535b..71fefa8b 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -1,37 +1,123 @@ using SPIRVIntrinsics: @builtin_ccall, @typed_ccall, LLVMPtr, known_intrinsics -@testset "atomics" begin +# Define the types to test +integer_types = [Int32, UInt32, Int64, UInt64] +float_types = [Float32, Float64] +all_types = vcat(integer_types, float_types) + +dev = OpenCL.cl.device() -function atomic_count(counter) - OpenCL.@atomic counter[] += 1 +# Arithmetic operations +function test_atomic_add(counter::AbstractArray{T}) where T + OpenCL.@atomic counter[] += one(T) + return +end +function test_atomic_sub(counter::AbstractArray{T}) where T + OpenCL.@atomic counter[] -= one(T) + return +end +# Bitwise operations +function test_atomic_and(counter::AbstractArray{T}) where T + OpenCL.@atomic counter[] &= ~(one(T) << (get_global_id() - 1)) + return +end +function test_atomic_or(counter::AbstractArray{T}) where T + OpenCL.@atomic counter[] |= one(T) << (get_global_id() - 1) + return +end +function test_atomic_xor(counter::AbstractArray{T}) where T + OpenCL.@atomic counter[] ⊻= one(T) << ((get_global_id() - 1) % 32) + return +end +# Min/max operations - use low-level API directly +function test_atomic_max(counter::AbstractArray{T}) where T + OpenCL.atomic_max!(pointer(counter), T(get_global_id())) + return +end +function test_atomic_min(counter::AbstractArray{T}) where T + OpenCL.atomic_min!(pointer(counter), T(get_global_id())) + return +end +# Exchange operation - use low-level API directly +function test_atomic_xchg(counter::AbstractArray{T}) where T + OpenCL.atomic_xchg!(pointer(counter), one(T)) return end +# Compare-and-swap operation - use low-level API directly +function test_atomic_cas(counter::AbstractArray{T}) where T + OpenCL.atomic_cmpxchg!(pointer(counter), zero(T), one(T)) + return +end + +# Define atomic operations to test +atomic_operations = [ + # op, init_val, expected_val + (test_atomic_add, 0, 1000), + (test_atomic_sub, 1000, 0), + (test_atomic_and, typemax(UInt64), 0), + (test_atomic_or, 0, typemax(UInt64)), + (test_atomic_xor, 0, typemax(UInt32) << 8), + (test_atomic_max, 0, 1000), + (test_atomic_min, 1000, 1), + (test_atomic_xchg, 0, 1), + (test_atomic_cas, 0, 1), +] +@testset "atomics" begin +@testset "$kernel_func - $T" for (kernel_func, init_val, expected_val) in atomic_operations, T in all_types + # Skip Int64/UInt64 if not supported + if sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in dev.extensions) + continue + end + + # Skip Float64 if not supported + if T == Float64 && !("cl_khr_fp64" in dev.extensions) + continue + end -@testset "atomic_add! ($T)" for T in [Int32, UInt32, Int64, UInt64] - if sizeof(T) == 4 || "cl_khr_int64_extended_atomics" in cl.device().extensions - a = OpenCL.zeros(T) - @opencl global_size=1000 atomic_count(a) - @test OpenCL.@allowscalar a[] == 1000 + # Bitwise operations (only valid for integers) + if kernel_func in [test_atomic_and, test_atomic_or, test_atomic_xor] && T <: AbstractFloat + continue end + + # Min/max operations (only supported for 32-bit integers in OpenCL) + if kernel_func in [test_atomic_min, test_atomic_max] && !(T in [Int32, UInt32]) + continue + end + + if T <: Integer + init_val %= T + expected_val %= T + end + + a = OpenCL.fill(T(init_val)) + @opencl global_size=1000 kernel_func(a) + result_val = OpenCL.@allowscalar a[] + @test result_val === T(expected_val) end + +@testset "atomic_add! ($T)" for T in [Float32, Float64] + # Float64 requires cl_khr_fp64 extension + if T == Float64 && !("cl_khr_fp64" in cl.device().extensions) + continue + end if "cl_ext_float_atomics" in cl.device().extensions - function atomic_float_add(counter, val) + @eval function atomic_float_add(counter, val::$T) @builtin_ccall( - "atomic_add", Float32, - (LLVMPtr{Float32, AS.CrossWorkgroup}, Float32), + "atomic_add", $T, + (LLVMPtr{$T, AS.CrossWorkgroup}, $T), pointer(counter), val, ) return end @testset "SPV_EXT_shader_atomic_float_add extension" begin - a = OpenCL.zeros(Float32) - @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, 1.0f0) - @test OpenCL.@allowscalar a[] == 1000.0f0 + a = OpenCL.zeros(T) + @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, one(T)) + @test OpenCL.@allowscalar a[] == T(1000.0) spv = sprint() do io - OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{Float32, 0, 1}, Float32}; extensions = ["SPV_EXT_shader_atomic_float_add"]) + OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{T, 0, 1}, T}; extensions = ["SPV_EXT_shader_atomic_float_add"]) end @test occursin("OpExtension \"SPV_EXT_shader_atomic_float_add\"", spv) @test occursin("OpAtomicFAddEXT", spv) @@ -39,3 +125,4 @@ end end end +end